2023-05-30 14:29:46 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace App\Http\Controllers;
|
|
|
|
|
|
|
|
use App\Models\WebCrawl;
|
|
|
|
use Illuminate\Http\Request;
|
2023-05-30 17:19:53 +00:00
|
|
|
use GuzzleHttp\Client;
|
2023-05-30 14:29:46 +00:00
|
|
|
|
|
|
|
class WebCrawlController extends Controller
|
|
|
|
{
|
|
|
|
|
2023-05-30 17:19:53 +00:00
|
|
|
protected $webCrawl;
|
2023-05-30 14:29:46 +00:00
|
|
|
/**
|
|
|
|
* Display a listing of the resource.
|
|
|
|
*
|
|
|
|
* @return \Illuminate\Http\Response
|
|
|
|
*/
|
|
|
|
public function index()
|
2023-05-30 17:19:53 +00:00
|
|
|
{
|
|
|
|
$allCrawls = WebCrawl::all();
|
|
|
|
|
2023-05-30 14:29:46 +00:00
|
|
|
//Return the results in JSON format
|
2023-05-30 17:19:53 +00:00
|
|
|
return response()->json($allCrawls);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function crawlWebsite($url, $depth) {
|
|
|
|
|
|
|
|
|
|
|
|
// // Use GuzzleHttp client to send HTTP requests
|
|
|
|
$client = new Client();
|
|
|
|
$response = $client->get($url);
|
|
|
|
if ($response->getStatusCode() >= 200 && $response->getStatusCode() < 300) {
|
|
|
|
|
|
|
|
$body = $response->getBody()->getContents();
|
|
|
|
// get:
|
|
|
|
// links from the page
|
|
|
|
// full content
|
|
|
|
// depth
|
|
|
|
// url
|
|
|
|
// visitedUrls
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// // Check if the HTTP response is successful (status code 2xx)
|
|
|
|
|
|
|
|
// // Insert a page info the database if the HTTP response satus is successful
|
|
|
|
// $webCrawl = new WebCrawl();
|
|
|
|
// $webCrawl->url = $url;
|
|
|
|
// $webCrawl->content = $response->getBody()->getContents();
|
|
|
|
// $webCrawl->save();
|
|
|
|
// }
|
|
|
|
// Crawl the links on the page
|
|
|
|
|
|
|
|
echo 'Crawling completed!';
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public function getOne($url)
|
|
|
|
{
|
|
|
|
$webCrawl = WebCrawl::where('url', $url)->first();
|
|
|
|
echo 'here!';die;
|
|
|
|
if ($webCrawl) {
|
|
|
|
return $webCrawl;
|
|
|
|
}
|
|
|
|
return false;
|
2023-05-30 14:29:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Show the form for creating a new resource.
|
|
|
|
*
|
|
|
|
* @return \Illuminate\Http\Response
|
|
|
|
*/
|
2023-05-30 17:19:53 +00:00
|
|
|
private function create($response, $url, $depth, $visitedUrls, $links)
|
2023-05-30 14:29:46 +00:00
|
|
|
{
|
2023-05-30 17:19:53 +00:00
|
|
|
$webCrawl = new WebCrawl();
|
|
|
|
$webCrawl->url = $url;
|
|
|
|
$webCrawl->content = $response->getBody()->getContents();
|
|
|
|
$webCrawl->depth = $depth;
|
|
|
|
$webCrawl->visited_urls = $visitedUrls;
|
|
|
|
$webCrawl->status_code = $response->getStatusCode();
|
|
|
|
$webCrawl->status = $response->getReasonPhrase();
|
|
|
|
$webCrawl->created_at = $response->getHeader('Date')[0];
|
|
|
|
$webCrawl->updated_at = $response->getHeader('Last-Modified')[0];
|
|
|
|
$webCrawl->links = $links;
|
|
|
|
$webCrawl->save();
|
|
|
|
return $webCrawl;
|
|
|
|
|
2023-05-30 14:29:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Store a newly created resource in storage.
|
|
|
|
*
|
|
|
|
* @param \Illuminate\Http\Request $request
|
|
|
|
* @return \Illuminate\Http\Response
|
|
|
|
*/
|
|
|
|
public function store(Request $request)
|
|
|
|
{
|
|
|
|
//
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Display the specified resource.
|
|
|
|
*
|
|
|
|
* @param \App\Models\WebCrawl $webCrawl
|
|
|
|
* @return \Illuminate\Http\Response
|
|
|
|
*/
|
|
|
|
public function show(WebCrawl $webCrawl)
|
|
|
|
{
|
|
|
|
//
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Show the form for editing the specified resource.
|
|
|
|
*
|
|
|
|
* @param \App\Models\WebCrawl $webCrawl
|
|
|
|
* @return \Illuminate\Http\Response
|
|
|
|
*/
|
|
|
|
public function edit(WebCrawl $webCrawl)
|
|
|
|
{
|
|
|
|
//
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Update the specified resource in storage.
|
|
|
|
*
|
|
|
|
* @param \Illuminate\Http\Request $request
|
|
|
|
* @param \App\Models\WebCrawl $webCrawl
|
|
|
|
* @return \Illuminate\Http\Response
|
|
|
|
*/
|
|
|
|
public function update(Request $request, WebCrawl $webCrawl)
|
|
|
|
{
|
|
|
|
//
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove the specified resource from storage.
|
|
|
|
*
|
|
|
|
* @param \App\Models\WebCrawl $webCrawl
|
|
|
|
* @return \Illuminate\Http\Response
|
|
|
|
*/
|
2023-05-30 17:19:53 +00:00
|
|
|
public function destroy($id)
|
2023-05-30 14:29:46 +00:00
|
|
|
{
|
2023-05-30 17:19:53 +00:00
|
|
|
$webCrawl = WebCrawl::where("_id", $id);
|
|
|
|
echo '<pre>';
|
|
|
|
echo 'fff';
|
|
|
|
print_r($webCrawl);die;
|
|
|
|
if ($webCrawl) {
|
|
|
|
$webCrawl->delete();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2023-05-30 14:29:46 +00:00
|
|
|
}
|
|
|
|
}
|