Web_Crawler_API/app/Http/Controllers/WebCrawlController.php

155 lines
3.8 KiB
PHP
Raw Normal View History

2023-05-30 14:29:46 +00:00
<?php
namespace App\Http\Controllers;
use App\Models\WebCrawl;
use Illuminate\Http\Request;
2023-05-30 17:19:53 +00:00
use GuzzleHttp\Client;
2023-05-30 14:29:46 +00:00
class WebCrawlController extends Controller
{
2023-05-30 17:19:53 +00:00
protected $webCrawl;
2023-05-30 14:29:46 +00:00
/**
* Display a listing of the resource.
*
* @return \Illuminate\Http\Response
*/
public function index()
2023-05-30 17:19:53 +00:00
{
$allCrawls = WebCrawl::all();
2023-05-30 14:29:46 +00:00
//Return the results in JSON format
2023-05-30 17:19:53 +00:00
return response()->json($allCrawls);
}
public function crawlWebsite($url, $depth) {
// // Use GuzzleHttp client to send HTTP requests
$client = new Client();
$response = $client->get($url);
if ($response->getStatusCode() >= 200 && $response->getStatusCode() < 300) {
$body = $response->getBody()->getContents();
// get:
// links from the page
// full content
// depth
// url
// visitedUrls
}
// // Check if the HTTP response is successful (status code 2xx)
// // Insert a page info the database if the HTTP response satus is successful
// $webCrawl = new WebCrawl();
// $webCrawl->url = $url;
// $webCrawl->content = $response->getBody()->getContents();
// $webCrawl->save();
// }
// Crawl the links on the page
echo 'Crawling completed!';
}
public function getOne($url)
{
$webCrawl = WebCrawl::where('url', $url)->first();
echo 'here!';die;
if ($webCrawl) {
return $webCrawl;
}
return false;
2023-05-30 14:29:46 +00:00
}
/**
* Show the form for creating a new resource.
*
* @return \Illuminate\Http\Response
*/
2023-05-30 17:19:53 +00:00
private function create($response, $url, $depth, $visitedUrls, $links)
2023-05-30 14:29:46 +00:00
{
2023-05-30 17:19:53 +00:00
$webCrawl = new WebCrawl();
$webCrawl->url = $url;
$webCrawl->content = $response->getBody()->getContents();
$webCrawl->depth = $depth;
$webCrawl->visited_urls = $visitedUrls;
$webCrawl->status_code = $response->getStatusCode();
$webCrawl->status = $response->getReasonPhrase();
$webCrawl->created_at = $response->getHeader('Date')[0];
$webCrawl->updated_at = $response->getHeader('Last-Modified')[0];
$webCrawl->links = $links;
$webCrawl->save();
return $webCrawl;
2023-05-30 14:29:46 +00:00
}
/**
* Store a newly created resource in storage.
*
* @param \Illuminate\Http\Request $request
* @return \Illuminate\Http\Response
*/
public function store(Request $request)
{
//
}
/**
* Display the specified resource.
*
* @param \App\Models\WebCrawl $webCrawl
* @return \Illuminate\Http\Response
*/
public function show(WebCrawl $webCrawl)
{
//
}
/**
* Show the form for editing the specified resource.
*
* @param \App\Models\WebCrawl $webCrawl
* @return \Illuminate\Http\Response
*/
public function edit(WebCrawl $webCrawl)
{
//
}
/**
* Update the specified resource in storage.
*
* @param \Illuminate\Http\Request $request
* @param \App\Models\WebCrawl $webCrawl
* @return \Illuminate\Http\Response
*/
public function update(Request $request, WebCrawl $webCrawl)
{
//
}
/**
* Remove the specified resource from storage.
*
* @param \App\Models\WebCrawl $webCrawl
* @return \Illuminate\Http\Response
*/
2023-05-30 17:19:53 +00:00
public function destroy($id)
2023-05-30 14:29:46 +00:00
{
2023-05-30 17:19:53 +00:00
$webCrawl = WebCrawl::where("_id", $id);
echo '<pre>';
echo 'fff';
print_r($webCrawl);die;
if ($webCrawl) {
$webCrawl->delete();
return true;
}
return false;
2023-05-30 14:29:46 +00:00
}
}