From 6e60bb513a6472fce7f2d5015007bfba02e16f08 Mon Sep 17 00:00:00 2001 From: Kfir Dayan Date: Tue, 30 May 2023 20:19:53 +0300 Subject: [PATCH] work in progress --- README.md | 66 +--------------- app/Http/Controllers/WebCrawlController.php | 79 +++++++++++++++++-- app/Providers/CrawlerServiceProvider.php | 35 ++++++++ ...3_05_30_121851_create_web_crawls_table.php | 7 +- docker-compose.yaml | 6 +- routes/api.php | 40 ++++++++-- routes/channels.php | 18 ----- 7 files changed, 152 insertions(+), 99 deletions(-) create mode 100644 app/Providers/CrawlerServiceProvider.php delete mode 100644 routes/channels.php diff --git a/README.md b/README.md index 3ed385a..cbdb580 100644 --- a/README.md +++ b/README.md @@ -1,66 +1,8 @@ -

Laravel Logo

+Run the server - php artisan serve -

-Build Status -Total Downloads -Latest Stable Version -License -

+run mongo - run docker-compose up -d -## About Laravel +migrate - php artisan migrate -Laravel is a web application framework with expressive, elegant syntax. We believe development must be an enjoyable and creative experience to be truly fulfilling. Laravel takes the pain out of development by easing common tasks used in many web projects, such as: +use .env file to set up the database connection -- [Simple, fast routing engine](https://laravel.com/docs/routing). -- [Powerful dependency injection container](https://laravel.com/docs/container). -- Multiple back-ends for [session](https://laravel.com/docs/session) and [cache](https://laravel.com/docs/cache) storage. -- Expressive, intuitive [database ORM](https://laravel.com/docs/eloquent). -- Database agnostic [schema migrations](https://laravel.com/docs/migrations). -- [Robust background job processing](https://laravel.com/docs/queues). -- [Real-time event broadcasting](https://laravel.com/docs/broadcasting). - -Laravel is accessible, powerful, and provides tools required for large, robust applications. - -## Learning Laravel - -Laravel has the most extensive and thorough [documentation](https://laravel.com/docs) and video tutorial library of all modern web application frameworks, making it a breeze to get started with the framework. - -You may also try the [Laravel Bootcamp](https://bootcamp.laravel.com), where you will be guided through building a modern Laravel application from scratch. - -If you don't feel like reading, [Laracasts](https://laracasts.com) can help. Laracasts contains over 2000 video tutorials on a range of topics including Laravel, modern PHP, unit testing, and JavaScript. Boost your skills by digging into our comprehensive video library. - -## Laravel Sponsors - -We would like to extend our thanks to the following sponsors for funding Laravel development. If you are interested in becoming a sponsor, please visit the Laravel [Patreon page](https://patreon.com/taylorotwell). - -### Premium Partners - -- **[Vehikl](https://vehikl.com/)** -- **[Tighten Co.](https://tighten.co)** -- **[Kirschbaum Development Group](https://kirschbaumdevelopment.com)** -- **[64 Robots](https://64robots.com)** -- **[Cubet Techno Labs](https://cubettech.com)** -- **[Cyber-Duck](https://cyber-duck.co.uk)** -- **[Many](https://www.many.co.uk)** -- **[Webdock, Fast VPS Hosting](https://www.webdock.io/en)** -- **[DevSquad](https://devsquad.com)** -- **[Curotec](https://www.curotec.com/services/technologies/laravel/)** -- **[OP.GG](https://op.gg)** -- **[WebReinvent](https://webreinvent.com/?utm_source=laravel&utm_medium=github&utm_campaign=patreon-sponsors)** -- **[Lendio](https://lendio.com)** - -## Contributing - -Thank you for considering contributing to the Laravel framework! The contribution guide can be found in the [Laravel documentation](https://laravel.com/docs/contributions). - -## Code of Conduct - -In order to ensure that the Laravel community is welcoming to all, please review and abide by the [Code of Conduct](https://laravel.com/docs/contributions#code-of-conduct). - -## Security Vulnerabilities - -If you discover a security vulnerability within Laravel, please send an e-mail to Taylor Otwell via [taylor@laravel.com](mailto:taylor@laravel.com). All security vulnerabilities will be promptly addressed. - -## License - -The Laravel framework is open-sourced software licensed under the [MIT license](https://opensource.org/licenses/MIT). diff --git a/app/Http/Controllers/WebCrawlController.php b/app/Http/Controllers/WebCrawlController.php index 563856b..8e3f8dc 100644 --- a/app/Http/Controllers/WebCrawlController.php +++ b/app/Http/Controllers/WebCrawlController.php @@ -4,19 +4,66 @@ use App\Models\WebCrawl; use Illuminate\Http\Request; +use GuzzleHttp\Client; class WebCrawlController extends Controller { + protected $webCrawl; /** * Display a listing of the resource. * * @return \Illuminate\Http\Response */ public function index() - { + { + $allCrawls = WebCrawl::all(); + //Return the results in JSON format - // return response()->json($webCrawl); + return response()->json($allCrawls); + } + + public function crawlWebsite($url, $depth) { + + + // // Use GuzzleHttp client to send HTTP requests + $client = new Client(); + $response = $client->get($url); + if ($response->getStatusCode() >= 200 && $response->getStatusCode() < 300) { + + $body = $response->getBody()->getContents(); + // get: + // links from the page + // full content + // depth + // url + // visitedUrls + + + } + + // // Check if the HTTP response is successful (status code 2xx) + + // // Insert a page info the database if the HTTP response satus is successful + // $webCrawl = new WebCrawl(); + // $webCrawl->url = $url; + // $webCrawl->content = $response->getBody()->getContents(); + // $webCrawl->save(); + // } + // Crawl the links on the page + + echo 'Crawling completed!'; + } + + + public function getOne($url) + { + $webCrawl = WebCrawl::where('url', $url)->first(); + echo 'here!';die; + if ($webCrawl) { + return $webCrawl; + } + return false; } /** @@ -24,9 +71,21 @@ public function index() * * @return \Illuminate\Http\Response */ - public function create() + private function create($response, $url, $depth, $visitedUrls, $links) { - // + $webCrawl = new WebCrawl(); + $webCrawl->url = $url; + $webCrawl->content = $response->getBody()->getContents(); + $webCrawl->depth = $depth; + $webCrawl->visited_urls = $visitedUrls; + $webCrawl->status_code = $response->getStatusCode(); + $webCrawl->status = $response->getReasonPhrase(); + $webCrawl->created_at = $response->getHeader('Date')[0]; + $webCrawl->updated_at = $response->getHeader('Last-Modified')[0]; + $webCrawl->links = $links; + $webCrawl->save(); + return $webCrawl; + } /** @@ -80,8 +139,16 @@ public function update(Request $request, WebCrawl $webCrawl) * @param \App\Models\WebCrawl $webCrawl * @return \Illuminate\Http\Response */ - public function destroy(WebCrawl $webCrawl) + public function destroy($id) { - // + $webCrawl = WebCrawl::where("_id", $id); + echo '
';
+        echo 'fff';
+        print_r($webCrawl);die;
+        if ($webCrawl) {
+            $webCrawl->delete();
+            return true;
+        }
+        return false;
     }
 }
diff --git a/app/Providers/CrawlerServiceProvider.php b/app/Providers/CrawlerServiceProvider.php
new file mode 100644
index 0000000..8924dda
--- /dev/null
+++ b/app/Providers/CrawlerServiceProvider.php
@@ -0,0 +1,35 @@
+id();
             $table->string('url');
             $table->string('content');
-            $table->string('depth');
-            $table->string('visited_urls');
-            $table->string('status_code');
-            $table->string('status');
             $table->string('created_at');
             $table->string('updated_at');
-            $table->string('links');
             $table->timestamps();
         });
     }
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 2fd0181..5b13b55 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -6,12 +6,14 @@ services:
     ports:
       - 27017:27017
     volumes:
-      - mongodb_data:/data/db
+      - data:/data/db
       - ./init-scripts/init.js:/docker-entrypoint-initdb.d/mongo-init.js
     environment:
       - MONGO_INITDB_DATABASE=${DB_DATABASE}
       - MONGO_INITDB_ROOT_USERNAME=${MONGO_INITDB_ROOT_USERNAME}
       - MONGO_INITDB_ROOT_PASSWORD=${MONGO_INITDB_ROOT_PASSWORD}
     platform: linux/arm64/v8
+    expose:
+      - 27017
 volumes:
-  mongodb_data:
\ No newline at end of file
+  data:
\ No newline at end of file
diff --git a/routes/api.php b/routes/api.php
index 2fcbd3c..20670aa 100644
--- a/routes/api.php
+++ b/routes/api.php
@@ -4,12 +4,42 @@
 use Illuminate\Support\Facades\Route;
 use GuzzleHttp\Client;
 use App\Http\Controllers\WebCrawlController;
+use GuzzleHttp\Psr7\Response;
 
+Route::get('/crawl', function (Request $request) {
+  // invode WebCrawlController index method in WebCrawlController
+  $url = $request->input('url');
+  // check if the url is valid URL
+  if (!$url || !filter_var($url, FILTER_VALIDATE_URL)) {
+    return response()->json([
+      'error' => 'Missing required parameter `url`'
+    ], 400);
+  }
+  $depth = $request->input('depth', 3); // default depth is 3 if not provided
+  
+  $crawlerController = new WebCrawlController();
+  $isAlreadyDone = $crawlerController->getOne($url);
+  if(!$isAlreadyDone){
+    $crawlerController->crawlWebsite($url, $depth);
+  } else {
+    return response()->json([
+      'error' => 'This URL has already been crawled',
+      'data' => $isAlreadyDone
+    ], 400);
+  }
+});
 
-
-// Route::get('/crawl', function (Request $request) {
-//    // invode WebCrawlController index method
-    
+// Route::post('/crawl/{id}', function (String $id, Request $request, Response $response) {
+//   $id = $request->input('id');
+//   $crawlerController = new WebCrawlController();
+//   if(!$crawlerController->destroy($id)) {
+//     return response()->json([
+//       'error' => 'Url Not Found',
+//     ], 404);
+//   } else {
+//     return response()->json([
+//       'success' => 'This URL has been deleted',
+//     ], 200);
+//   }
 // });
 
-Route::get('/crawl', [WebCrawlController::class, 'index']);
\ No newline at end of file
diff --git a/routes/channels.php b/routes/channels.php
deleted file mode 100644
index 5d451e1..0000000
--- a/routes/channels.php
+++ /dev/null
@@ -1,18 +0,0 @@
-id === (int) $id;
-});