done with migration file
This commit is contained in:
parent
c883218ad4
commit
a2df1212b1
11 changed files with 134 additions and 232 deletions
87
app/Http/Controllers/WebCrawlController.php
Normal file
87
app/Http/Controllers/WebCrawlController.php
Normal file
|
@ -0,0 +1,87 @@
|
|||
<?php
|
||||
|
||||
namespace App\Http\Controllers;
|
||||
|
||||
use App\Models\WebCrawl;
|
||||
use Illuminate\Http\Request;
|
||||
|
||||
class WebCrawlController extends Controller
|
||||
{
|
||||
|
||||
/**
|
||||
* Display a listing of the resource.
|
||||
*
|
||||
* @return \Illuminate\Http\Response
|
||||
*/
|
||||
public function index()
|
||||
{
|
||||
//Return the results in JSON format
|
||||
// return response()->json($webCrawl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Show the form for creating a new resource.
|
||||
*
|
||||
* @return \Illuminate\Http\Response
|
||||
*/
|
||||
public function create()
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a newly created resource in storage.
|
||||
*
|
||||
* @param \Illuminate\Http\Request $request
|
||||
* @return \Illuminate\Http\Response
|
||||
*/
|
||||
public function store(Request $request)
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
/**
|
||||
* Display the specified resource.
|
||||
*
|
||||
* @param \App\Models\WebCrawl $webCrawl
|
||||
* @return \Illuminate\Http\Response
|
||||
*/
|
||||
public function show(WebCrawl $webCrawl)
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
/**
|
||||
* Show the form for editing the specified resource.
|
||||
*
|
||||
* @param \App\Models\WebCrawl $webCrawl
|
||||
* @return \Illuminate\Http\Response
|
||||
*/
|
||||
public function edit(WebCrawl $webCrawl)
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the specified resource in storage.
|
||||
*
|
||||
* @param \Illuminate\Http\Request $request
|
||||
* @param \App\Models\WebCrawl $webCrawl
|
||||
* @return \Illuminate\Http\Response
|
||||
*/
|
||||
public function update(Request $request, WebCrawl $webCrawl)
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the specified resource from storage.
|
||||
*
|
||||
* @param \App\Models\WebCrawl $webCrawl
|
||||
* @return \Illuminate\Http\Response
|
||||
*/
|
||||
public function destroy(WebCrawl $webCrawl)
|
||||
{
|
||||
//
|
||||
}
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
<?php
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
// use Illuminate\Contracts\Auth\MustVerifyEmail;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Foundation\Auth\User as Authenticatable;
|
||||
use Illuminate\Notifications\Notifiable;
|
||||
use Laravel\Sanctum\HasApiTokens;
|
||||
|
||||
class User extends Authenticatable
|
||||
{
|
||||
use HasApiTokens, HasFactory, Notifiable;
|
||||
|
||||
/**
|
||||
* The attributes that are mass assignable.
|
||||
*
|
||||
* @var array<int, string>
|
||||
*/
|
||||
protected $fillable = [
|
||||
'name',
|
||||
'email',
|
||||
'password',
|
||||
];
|
||||
|
||||
/**
|
||||
* The attributes that should be hidden for serialization.
|
||||
*
|
||||
* @var array<int, string>
|
||||
*/
|
||||
protected $hidden = [
|
||||
'password',
|
||||
'remember_token',
|
||||
];
|
||||
|
||||
/**
|
||||
* The attributes that should be cast.
|
||||
*
|
||||
* @var array<string, string>
|
||||
*/
|
||||
protected $casts = [
|
||||
'email_verified_at' => 'datetime',
|
||||
];
|
||||
}
|
12
app/Models/WebCrawl.php
Normal file
12
app/Models/WebCrawl.php
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
use Jenssegers\Mongodb\Eloquent\Model;
|
||||
|
||||
class WebCrawl extends Model
|
||||
{
|
||||
protected $connection = 'mongodb';
|
||||
protected $collection = 'webcrawl';
|
||||
|
||||
}
|
|
@ -10,7 +10,8 @@
|
|||
"jenssegers/mongodb": "^3.9",
|
||||
"laravel/framework": "^9.19",
|
||||
"laravel/sanctum": "^3.0",
|
||||
"laravel/tinker": "^2.7"
|
||||
"laravel/tinker": "^2.7",
|
||||
"mongodb/mongodb": "^1.15"
|
||||
},
|
||||
"require-dev": {
|
||||
"fakerphp/faker": "^1.9.1",
|
||||
|
@ -26,6 +27,9 @@
|
|||
"App\\": "app/",
|
||||
"Database\\Factories\\": "database/factories/",
|
||||
"Database\\Seeders\\": "database/seeders/"
|
||||
},
|
||||
"psr-0": {
|
||||
"MongoDB\\": "vendor/mongodb/mongodb/src/"
|
||||
}
|
||||
},
|
||||
"autoload-dev": {
|
||||
|
|
2
composer.lock
generated
2
composer.lock
generated
|
@ -4,7 +4,7 @@
|
|||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"content-hash": "ac7eef60dd12024b114a9f511ed7057b",
|
||||
"content-hash": "c07b5ed598f10682dc4f14a3c8bb0809",
|
||||
"packages": [
|
||||
{
|
||||
"name": "brick/math",
|
||||
|
|
|
@ -32,19 +32,19 @@
|
|||
| choice installed on your machine before you begin development.
|
||||
|
|
||||
*/
|
||||
'mongodb' => [
|
||||
'driver' => 'mongodb',
|
||||
'host' => env('DB_HOST', 'localhost'),
|
||||
'port' => env('DB_PORT', 27017),
|
||||
'database' => env('DB_DATABASE'),
|
||||
'username' => env('DB_USERNAME'),
|
||||
'password' => env('DB_PASSWORD'),
|
||||
'options' => [
|
||||
'database' => env('DB_AUTHENTICATION_DATABASE', 'admin'),
|
||||
],
|
||||
],
|
||||
|
||||
'connections' => [
|
||||
|
||||
'mongodb' => [
|
||||
'driver' => 'mongodb',
|
||||
'host' => env('DB_HOST', 'localhost'),
|
||||
'port' => env('DB_PORT', 27017),
|
||||
'database' => env('DB_DATABASE'),
|
||||
'username' => env('DB_USERNAME'),
|
||||
'password' => env('DB_PASSWORD'),
|
||||
'options' => [
|
||||
'database' => env('DB_AUTHENTICATION_DATABASE', 'admin'),
|
||||
],
|
||||
],
|
||||
],
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,36 +0,0 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function up()
|
||||
{
|
||||
Schema::create('users', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->string('name');
|
||||
$table->string('email')->unique();
|
||||
$table->timestamp('email_verified_at')->nullable();
|
||||
$table->string('password');
|
||||
$table->rememberToken();
|
||||
$table->timestamps();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function down()
|
||||
{
|
||||
Schema::dropIfExists('users');
|
||||
}
|
||||
};
|
|
@ -1,32 +0,0 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function up()
|
||||
{
|
||||
Schema::create('password_resets', function (Blueprint $table) {
|
||||
$table->string('email')->primary();
|
||||
$table->string('token');
|
||||
$table->timestamp('created_at')->nullable();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function down()
|
||||
{
|
||||
Schema::dropIfExists('password_resets');
|
||||
}
|
||||
};
|
|
@ -1,36 +0,0 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function up()
|
||||
{
|
||||
Schema::create('failed_jobs', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->string('uuid')->unique();
|
||||
$table->text('connection');
|
||||
$table->text('queue');
|
||||
$table->longText('payload');
|
||||
$table->longText('exception');
|
||||
$table->timestamp('failed_at')->useCurrent();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function down()
|
||||
{
|
||||
Schema::dropIfExists('failed_jobs');
|
||||
}
|
||||
};
|
|
@ -13,14 +13,17 @@
|
|||
*/
|
||||
public function up()
|
||||
{
|
||||
Schema::create('personal_access_tokens', function (Blueprint $table) {
|
||||
Schema::create('web_crawls', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->morphs('tokenable');
|
||||
$table->string('name');
|
||||
$table->string('token', 64)->unique();
|
||||
$table->text('abilities')->nullable();
|
||||
$table->timestamp('last_used_at')->nullable();
|
||||
$table->timestamp('expires_at')->nullable();
|
||||
$table->string('url');
|
||||
$table->string('content');
|
||||
$table->string('depth');
|
||||
$table->string('visited_urls');
|
||||
$table->string('status_code');
|
||||
$table->string('status');
|
||||
$table->string('created_at');
|
||||
$table->string('updated_at');
|
||||
$table->string('links');
|
||||
$table->timestamps();
|
||||
});
|
||||
}
|
||||
|
@ -32,6 +35,6 @@ public function up()
|
|||
*/
|
||||
public function down()
|
||||
{
|
||||
Schema::dropIfExists('personal_access_tokens');
|
||||
Schema::dropIfExists('web_crawls');
|
||||
}
|
||||
};
|
|
@ -3,69 +3,13 @@
|
|||
use Illuminate\Http\Request;
|
||||
use Illuminate\Support\Facades\Route;
|
||||
use GuzzleHttp\Client;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use MongoDB\Client as MongoClient;
|
||||
|
||||
Route::get('/crawl', function (Request $request) {
|
||||
|
||||
$url = $request->input('url');
|
||||
check_connection_to_mongodb();
|
||||
|
||||
if (!$url) {
|
||||
return response()->json([
|
||||
'error' => 'Missing required parameter `url`'
|
||||
], 400);
|
||||
}
|
||||
$depth = $request->input('depth', 3); // default depth is 3 if not provided
|
||||
$visitedUrls = [];
|
||||
crawlWebsite($url, $depth, $visitedUrls);
|
||||
});
|
||||
use App\Http\Controllers\WebCrawlController;
|
||||
|
||||
|
||||
function check_connection_to_mongodb() {
|
||||
$connection = new MongoClient();
|
||||
echo '<pre>';
|
||||
echo "IT WORKS";
|
||||
die;
|
||||
print_r($connection);
|
||||
die;
|
||||
}
|
||||
function crawlWebsite($url, $depth, &$visitedUrls)
|
||||
{
|
||||
// Check if URL has already been visited
|
||||
if (in_array($url, $visitedUrls)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$visitedUrls[] = $url;
|
||||
// Route::get('/crawl', function (Request $request) {
|
||||
// // invode WebCrawlController index method
|
||||
|
||||
// });
|
||||
|
||||
// Use GuzzleHttp client to send HTTP requests
|
||||
$client = new Client();
|
||||
$response = $client->get($url);
|
||||
|
||||
// Check if the HTTP response is successful (status code 2xx)
|
||||
if ($response->getStatusCode() >= 200 && $response->getStatusCode() < 300) {
|
||||
|
||||
// echo $response->getBody()->getContents();
|
||||
|
||||
// Insert page info into the database
|
||||
// DB::table('pages')->insert([
|
||||
// 'url' => $url,
|
||||
// 'content' => $response->getBody()->getContents()
|
||||
// ]);
|
||||
// Crawl the links on the page
|
||||
// if ($depth > 0) {
|
||||
// $body = $response->getBody()->getContents();
|
||||
// $dom = new DOMDocument();
|
||||
// @$dom->loadHTML($body);
|
||||
|
||||
// $links = $dom->getElementsByTagName('a');
|
||||
// foreach ($links as $link) {
|
||||
// $href = $link->getAttribute('href');
|
||||
// if (filter_var($href, FILTER_VALIDATE_URL)) {
|
||||
// crawlWebsite($href, $depth - 1, $visitedUrls);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
||||
Route::get('/crawl', [WebCrawlController::class, 'index']);
|
Loading…
Reference in a new issue