improved - nameing convention and spaces
This commit is contained in:
parent
231cd17902
commit
a9e810b8fb
3 changed files with 33 additions and 34 deletions
|
@ -52,8 +52,8 @@ # GET /api/crawl:
|
||||||
Crawls a website and stores the crawled data in the database. Required query parameter: url. Optional query parameter: depth (default: 1).
|
Crawls a website and stores the crawled data in the database. Required query parameter: url. Optional query parameter: depth (default: 1).
|
||||||
Parameters:
|
Parameters:
|
||||||
- `url` (required): The URL of the website to crawl.
|
- `url` (required): The URL of the website to crawl.
|
||||||
- `depth` (optional): The depth of the crawling process (default: 1).
|
- `depth` (optional): The depth of the crawling process (default: 0).
|
||||||
- `refresh` (optional): If set to true, the crawler will refresh the results for an existing URL (default: false).
|
- `refresh` (optional): If set to 1, the crawler will refresh the results for an existing URL (default: false).
|
||||||
# GET /api:
|
# GET /api:
|
||||||
Retrieves all crawled data from the database.
|
Retrieves all crawled data from the database.
|
||||||
# DELETE /api/crawl/{id}:
|
# DELETE /api/crawl/{id}:
|
||||||
|
|
|
@ -23,7 +23,7 @@ public function crawlWebsite(WebCrawlRequest $request)
|
||||||
{
|
{
|
||||||
$url = $request->query('url');
|
$url = $request->query('url');
|
||||||
$depth = $request->query('depth', 0);
|
$depth = $request->query('depth', 0);
|
||||||
$refresh = $request->query('refresh', false);
|
$refresh = $request->query('refresh', 0);
|
||||||
|
|
||||||
return $this->webCrawlerService->crawlWebsite($url, $depth, $refresh);
|
return $this->webCrawlerService->crawlWebsite($url, $depth, $refresh);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,10 +23,9 @@ public function getAllCrawls()
|
||||||
|
|
||||||
public function crawlWebsite($url, $depth, $refresh)
|
public function crawlWebsite($url, $depth, $refresh)
|
||||||
{
|
{
|
||||||
// Check if the URL is already in the database
|
|
||||||
$webCrawl = WebCrawl::where('url', $url)->first();
|
$webCrawl = WebCrawl::where('url', $url)->first();
|
||||||
if ($webCrawl && !$refresh) {
|
if ($webCrawl && !$refresh) {
|
||||||
Log::error("This URL already exists in the database $url");
|
Log::error("This URL already exists in the database: $url");
|
||||||
return response()->json([
|
return response()->json([
|
||||||
'error' => 'This URL already exists in the database',
|
'error' => 'This URL already exists in the database',
|
||||||
], 400);
|
], 400);
|
||||||
|
@ -57,13 +56,13 @@ public function crawlWebsite($url, $depth, $refresh)
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Log::error("Failed to retrieve the URL $url");
|
Log::error("Failed to retrieve the URL: $url");
|
||||||
return response()->json([
|
return response()->json([
|
||||||
'error' => 'Failed to retrieve the URL',
|
'error' => 'Failed to retrieve the URL',
|
||||||
], 500);
|
], 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
Log::info("Crawling completed successfully For URL $url");
|
Log::info("Crawling completed successfully for URL: $url");
|
||||||
return response()->json([
|
return response()->json([
|
||||||
'message' => 'Crawling completed successfully',
|
'message' => 'Crawling completed successfully',
|
||||||
]);
|
]);
|
||||||
|
@ -74,12 +73,12 @@ public function deleteCrawl($id)
|
||||||
$webCrawl = WebCrawl::find($id);
|
$webCrawl = WebCrawl::find($id);
|
||||||
if ($webCrawl) {
|
if ($webCrawl) {
|
||||||
$webCrawl->delete();
|
$webCrawl->delete();
|
||||||
Log::info("Web crawl deleted successfully For ID $id");
|
Log::info("Web crawl deleted successfully for ID: $id");
|
||||||
return response()->json([
|
return response()->json([
|
||||||
'message' => 'Web crawl deleted successfully',
|
'message' => 'Web crawl deleted successfully',
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
Log::error("Web crawl not found For ID $id");
|
Log::error("Web crawl not found for ID: $id");
|
||||||
return response()->json([
|
return response()->json([
|
||||||
'error' => 'Web crawl not found',
|
'error' => 'Web crawl not found',
|
||||||
], 404);
|
], 404);
|
||||||
|
@ -109,9 +108,9 @@ protected function crawlWebsiteRecursive($url, $depth)
|
||||||
$linksFromPage = $this->getLinksFromPage($crawler->content);
|
$linksFromPage = $this->getLinksFromPage($crawler->content);
|
||||||
try {
|
try {
|
||||||
$crawler->save();
|
$crawler->save();
|
||||||
Log::info("URL saved to the database $url");
|
Log::info("URL saved to the database: $url");
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
Log::error("Can't save the URL to the database $url");
|
Log::error("Can't save the URL to the database: $url");
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
if ($depth > 0 && count($linksFromPage) > 0) {
|
if ($depth > 0 && count($linksFromPage) > 0) {
|
||||||
|
|
Loading…
Reference in a new issue