done with migration file
This commit is contained in:
parent
c883218ad4
commit
a2df1212b1
11 changed files with 134 additions and 232 deletions
87
app/Http/Controllers/WebCrawlController.php
Normal file
87
app/Http/Controllers/WebCrawlController.php
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Http\Controllers;
|
||||||
|
|
||||||
|
use App\Models\WebCrawl;
|
||||||
|
use Illuminate\Http\Request;
|
||||||
|
|
||||||
|
class WebCrawlController extends Controller
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Display a listing of the resource.
|
||||||
|
*
|
||||||
|
* @return \Illuminate\Http\Response
|
||||||
|
*/
|
||||||
|
public function index()
|
||||||
|
{
|
||||||
|
//Return the results in JSON format
|
||||||
|
// return response()->json($webCrawl);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Show the form for creating a new resource.
|
||||||
|
*
|
||||||
|
* @return \Illuminate\Http\Response
|
||||||
|
*/
|
||||||
|
public function create()
|
||||||
|
{
|
||||||
|
//
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store a newly created resource in storage.
|
||||||
|
*
|
||||||
|
* @param \Illuminate\Http\Request $request
|
||||||
|
* @return \Illuminate\Http\Response
|
||||||
|
*/
|
||||||
|
public function store(Request $request)
|
||||||
|
{
|
||||||
|
//
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Display the specified resource.
|
||||||
|
*
|
||||||
|
* @param \App\Models\WebCrawl $webCrawl
|
||||||
|
* @return \Illuminate\Http\Response
|
||||||
|
*/
|
||||||
|
public function show(WebCrawl $webCrawl)
|
||||||
|
{
|
||||||
|
//
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Show the form for editing the specified resource.
|
||||||
|
*
|
||||||
|
* @param \App\Models\WebCrawl $webCrawl
|
||||||
|
* @return \Illuminate\Http\Response
|
||||||
|
*/
|
||||||
|
public function edit(WebCrawl $webCrawl)
|
||||||
|
{
|
||||||
|
//
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the specified resource in storage.
|
||||||
|
*
|
||||||
|
* @param \Illuminate\Http\Request $request
|
||||||
|
* @param \App\Models\WebCrawl $webCrawl
|
||||||
|
* @return \Illuminate\Http\Response
|
||||||
|
*/
|
||||||
|
public function update(Request $request, WebCrawl $webCrawl)
|
||||||
|
{
|
||||||
|
//
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove the specified resource from storage.
|
||||||
|
*
|
||||||
|
* @param \App\Models\WebCrawl $webCrawl
|
||||||
|
* @return \Illuminate\Http\Response
|
||||||
|
*/
|
||||||
|
public function destroy(WebCrawl $webCrawl)
|
||||||
|
{
|
||||||
|
//
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,44 +0,0 @@
|
||||||
<?php
|
|
||||||
|
|
||||||
namespace App\Models;
|
|
||||||
|
|
||||||
// use Illuminate\Contracts\Auth\MustVerifyEmail;
|
|
||||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
|
||||||
use Illuminate\Foundation\Auth\User as Authenticatable;
|
|
||||||
use Illuminate\Notifications\Notifiable;
|
|
||||||
use Laravel\Sanctum\HasApiTokens;
|
|
||||||
|
|
||||||
class User extends Authenticatable
|
|
||||||
{
|
|
||||||
use HasApiTokens, HasFactory, Notifiable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The attributes that are mass assignable.
|
|
||||||
*
|
|
||||||
* @var array<int, string>
|
|
||||||
*/
|
|
||||||
protected $fillable = [
|
|
||||||
'name',
|
|
||||||
'email',
|
|
||||||
'password',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The attributes that should be hidden for serialization.
|
|
||||||
*
|
|
||||||
* @var array<int, string>
|
|
||||||
*/
|
|
||||||
protected $hidden = [
|
|
||||||
'password',
|
|
||||||
'remember_token',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The attributes that should be cast.
|
|
||||||
*
|
|
||||||
* @var array<string, string>
|
|
||||||
*/
|
|
||||||
protected $casts = [
|
|
||||||
'email_verified_at' => 'datetime',
|
|
||||||
];
|
|
||||||
}
|
|
12
app/Models/WebCrawl.php
Normal file
12
app/Models/WebCrawl.php
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace App\Models;
|
||||||
|
|
||||||
|
use Jenssegers\Mongodb\Eloquent\Model;
|
||||||
|
|
||||||
|
class WebCrawl extends Model
|
||||||
|
{
|
||||||
|
protected $connection = 'mongodb';
|
||||||
|
protected $collection = 'webcrawl';
|
||||||
|
|
||||||
|
}
|
|
@ -10,7 +10,8 @@
|
||||||
"jenssegers/mongodb": "^3.9",
|
"jenssegers/mongodb": "^3.9",
|
||||||
"laravel/framework": "^9.19",
|
"laravel/framework": "^9.19",
|
||||||
"laravel/sanctum": "^3.0",
|
"laravel/sanctum": "^3.0",
|
||||||
"laravel/tinker": "^2.7"
|
"laravel/tinker": "^2.7",
|
||||||
|
"mongodb/mongodb": "^1.15"
|
||||||
},
|
},
|
||||||
"require-dev": {
|
"require-dev": {
|
||||||
"fakerphp/faker": "^1.9.1",
|
"fakerphp/faker": "^1.9.1",
|
||||||
|
@ -26,6 +27,9 @@
|
||||||
"App\\": "app/",
|
"App\\": "app/",
|
||||||
"Database\\Factories\\": "database/factories/",
|
"Database\\Factories\\": "database/factories/",
|
||||||
"Database\\Seeders\\": "database/seeders/"
|
"Database\\Seeders\\": "database/seeders/"
|
||||||
|
},
|
||||||
|
"psr-0": {
|
||||||
|
"MongoDB\\": "vendor/mongodb/mongodb/src/"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"autoload-dev": {
|
"autoload-dev": {
|
||||||
|
|
2
composer.lock
generated
2
composer.lock
generated
|
@ -4,7 +4,7 @@
|
||||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||||
"This file is @generated automatically"
|
"This file is @generated automatically"
|
||||||
],
|
],
|
||||||
"content-hash": "ac7eef60dd12024b114a9f511ed7057b",
|
"content-hash": "c07b5ed598f10682dc4f14a3c8bb0809",
|
||||||
"packages": [
|
"packages": [
|
||||||
{
|
{
|
||||||
"name": "brick/math",
|
"name": "brick/math",
|
||||||
|
|
|
@ -32,19 +32,19 @@
|
||||||
| choice installed on your machine before you begin development.
|
| choice installed on your machine before you begin development.
|
||||||
|
|
|
|
||||||
*/
|
*/
|
||||||
'mongodb' => [
|
|
||||||
'driver' => 'mongodb',
|
|
||||||
'host' => env('DB_HOST', 'localhost'),
|
|
||||||
'port' => env('DB_PORT', 27017),
|
|
||||||
'database' => env('DB_DATABASE'),
|
|
||||||
'username' => env('DB_USERNAME'),
|
|
||||||
'password' => env('DB_PASSWORD'),
|
|
||||||
'options' => [
|
|
||||||
'database' => env('DB_AUTHENTICATION_DATABASE', 'admin'),
|
|
||||||
],
|
|
||||||
],
|
|
||||||
'connections' => [
|
|
||||||
|
|
||||||
|
'connections' => [
|
||||||
|
'mongodb' => [
|
||||||
|
'driver' => 'mongodb',
|
||||||
|
'host' => env('DB_HOST', 'localhost'),
|
||||||
|
'port' => env('DB_PORT', 27017),
|
||||||
|
'database' => env('DB_DATABASE'),
|
||||||
|
'username' => env('DB_USERNAME'),
|
||||||
|
'password' => env('DB_PASSWORD'),
|
||||||
|
'options' => [
|
||||||
|
'database' => env('DB_AUTHENTICATION_DATABASE', 'admin'),
|
||||||
|
],
|
||||||
|
],
|
||||||
],
|
],
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1,36 +0,0 @@
|
||||||
<?php
|
|
||||||
|
|
||||||
use Illuminate\Database\Migrations\Migration;
|
|
||||||
use Illuminate\Database\Schema\Blueprint;
|
|
||||||
use Illuminate\Support\Facades\Schema;
|
|
||||||
|
|
||||||
return new class extends Migration
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Run the migrations.
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
|
||||||
public function up()
|
|
||||||
{
|
|
||||||
Schema::create('users', function (Blueprint $table) {
|
|
||||||
$table->id();
|
|
||||||
$table->string('name');
|
|
||||||
$table->string('email')->unique();
|
|
||||||
$table->timestamp('email_verified_at')->nullable();
|
|
||||||
$table->string('password');
|
|
||||||
$table->rememberToken();
|
|
||||||
$table->timestamps();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reverse the migrations.
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
|
||||||
public function down()
|
|
||||||
{
|
|
||||||
Schema::dropIfExists('users');
|
|
||||||
}
|
|
||||||
};
|
|
|
@ -1,32 +0,0 @@
|
||||||
<?php
|
|
||||||
|
|
||||||
use Illuminate\Database\Migrations\Migration;
|
|
||||||
use Illuminate\Database\Schema\Blueprint;
|
|
||||||
use Illuminate\Support\Facades\Schema;
|
|
||||||
|
|
||||||
return new class extends Migration
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Run the migrations.
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
|
||||||
public function up()
|
|
||||||
{
|
|
||||||
Schema::create('password_resets', function (Blueprint $table) {
|
|
||||||
$table->string('email')->primary();
|
|
||||||
$table->string('token');
|
|
||||||
$table->timestamp('created_at')->nullable();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reverse the migrations.
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
|
||||||
public function down()
|
|
||||||
{
|
|
||||||
Schema::dropIfExists('password_resets');
|
|
||||||
}
|
|
||||||
};
|
|
|
@ -1,36 +0,0 @@
|
||||||
<?php
|
|
||||||
|
|
||||||
use Illuminate\Database\Migrations\Migration;
|
|
||||||
use Illuminate\Database\Schema\Blueprint;
|
|
||||||
use Illuminate\Support\Facades\Schema;
|
|
||||||
|
|
||||||
return new class extends Migration
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Run the migrations.
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
|
||||||
public function up()
|
|
||||||
{
|
|
||||||
Schema::create('failed_jobs', function (Blueprint $table) {
|
|
||||||
$table->id();
|
|
||||||
$table->string('uuid')->unique();
|
|
||||||
$table->text('connection');
|
|
||||||
$table->text('queue');
|
|
||||||
$table->longText('payload');
|
|
||||||
$table->longText('exception');
|
|
||||||
$table->timestamp('failed_at')->useCurrent();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reverse the migrations.
|
|
||||||
*
|
|
||||||
* @return void
|
|
||||||
*/
|
|
||||||
public function down()
|
|
||||||
{
|
|
||||||
Schema::dropIfExists('failed_jobs');
|
|
||||||
}
|
|
||||||
};
|
|
|
@ -13,14 +13,17 @@
|
||||||
*/
|
*/
|
||||||
public function up()
|
public function up()
|
||||||
{
|
{
|
||||||
Schema::create('personal_access_tokens', function (Blueprint $table) {
|
Schema::create('web_crawls', function (Blueprint $table) {
|
||||||
$table->id();
|
$table->id();
|
||||||
$table->morphs('tokenable');
|
$table->string('url');
|
||||||
$table->string('name');
|
$table->string('content');
|
||||||
$table->string('token', 64)->unique();
|
$table->string('depth');
|
||||||
$table->text('abilities')->nullable();
|
$table->string('visited_urls');
|
||||||
$table->timestamp('last_used_at')->nullable();
|
$table->string('status_code');
|
||||||
$table->timestamp('expires_at')->nullable();
|
$table->string('status');
|
||||||
|
$table->string('created_at');
|
||||||
|
$table->string('updated_at');
|
||||||
|
$table->string('links');
|
||||||
$table->timestamps();
|
$table->timestamps();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -32,6 +35,6 @@ public function up()
|
||||||
*/
|
*/
|
||||||
public function down()
|
public function down()
|
||||||
{
|
{
|
||||||
Schema::dropIfExists('personal_access_tokens');
|
Schema::dropIfExists('web_crawls');
|
||||||
}
|
}
|
||||||
};
|
};
|
|
@ -3,69 +3,13 @@
|
||||||
use Illuminate\Http\Request;
|
use Illuminate\Http\Request;
|
||||||
use Illuminate\Support\Facades\Route;
|
use Illuminate\Support\Facades\Route;
|
||||||
use GuzzleHttp\Client;
|
use GuzzleHttp\Client;
|
||||||
use Illuminate\Support\Facades\DB;
|
use App\Http\Controllers\WebCrawlController;
|
||||||
use MongoDB\Client as MongoClient;
|
|
||||||
|
|
||||||
Route::get('/crawl', function (Request $request) {
|
|
||||||
|
|
||||||
$url = $request->input('url');
|
|
||||||
check_connection_to_mongodb();
|
|
||||||
|
|
||||||
if (!$url) {
|
|
||||||
return response()->json([
|
|
||||||
'error' => 'Missing required parameter `url`'
|
|
||||||
], 400);
|
|
||||||
}
|
|
||||||
$depth = $request->input('depth', 3); // default depth is 3 if not provided
|
|
||||||
$visitedUrls = [];
|
|
||||||
crawlWebsite($url, $depth, $visitedUrls);
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
function check_connection_to_mongodb() {
|
|
||||||
$connection = new MongoClient();
|
|
||||||
echo '<pre>';
|
|
||||||
echo "IT WORKS";
|
|
||||||
die;
|
|
||||||
print_r($connection);
|
|
||||||
die;
|
|
||||||
}
|
|
||||||
function crawlWebsite($url, $depth, &$visitedUrls)
|
|
||||||
{
|
|
||||||
// Check if URL has already been visited
|
|
||||||
if (in_array($url, $visitedUrls)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
$visitedUrls[] = $url;
|
// Route::get('/crawl', function (Request $request) {
|
||||||
|
// // invode WebCrawlController index method
|
||||||
|
|
||||||
// Use GuzzleHttp client to send HTTP requests
|
// });
|
||||||
$client = new Client();
|
|
||||||
$response = $client->get($url);
|
|
||||||
|
|
||||||
// Check if the HTTP response is successful (status code 2xx)
|
Route::get('/crawl', [WebCrawlController::class, 'index']);
|
||||||
if ($response->getStatusCode() >= 200 && $response->getStatusCode() < 300) {
|
|
||||||
|
|
||||||
// echo $response->getBody()->getContents();
|
|
||||||
|
|
||||||
// Insert page info into the database
|
|
||||||
// DB::table('pages')->insert([
|
|
||||||
// 'url' => $url,
|
|
||||||
// 'content' => $response->getBody()->getContents()
|
|
||||||
// ]);
|
|
||||||
// Crawl the links on the page
|
|
||||||
// if ($depth > 0) {
|
|
||||||
// $body = $response->getBody()->getContents();
|
|
||||||
// $dom = new DOMDocument();
|
|
||||||
// @$dom->loadHTML($body);
|
|
||||||
|
|
||||||
// $links = $dom->getElementsByTagName('a');
|
|
||||||
// foreach ($links as $link) {
|
|
||||||
// $href = $link->getAttribute('href');
|
|
||||||
// if (filter_var($href, FILTER_VALIDATE_URL)) {
|
|
||||||
// crawlWebsite($href, $depth - 1, $visitedUrls);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in a new issue