From b8965e2e3ede702c624849cb01fbc3ddee272fa3 Mon Sep 17 00:00:00 2001 From: Kfir Dayan Date: Tue, 18 Apr 2023 11:46:57 +0300 Subject: [PATCH] fixing bugs --- src/crawler/crawler.service.ts | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/crawler/crawler.service.ts b/src/crawler/crawler.service.ts index 73a276a..0d55431 100644 --- a/src/crawler/crawler.service.ts +++ b/src/crawler/crawler.service.ts @@ -5,10 +5,8 @@ import puppeteer from 'puppeteer'; import { URL } from 'url'; import axios from 'axios'; - @Injectable() export class CrawlerService { - async crawl(url: string){ const browser = await puppeteer.launch(); const page = await browser.newPage(); @@ -21,13 +19,13 @@ export class CrawlerService { // STYLESHEETS // const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href)); let cssDir = `${directory}/css/` - await this.downloadFile(stylesheetsUrls, cssDir); + await this.downloadFiles(stylesheetsUrls, cssDir); // STYLESHEETS // // SCRIPTS // const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src)); let scriptsDir = `${directory}/scripts/` - await this.downloadFile(scriptsUrls, scriptsDir); + await this.downloadFiles(scriptsUrls, scriptsDir); // SCRIPTS // // SCREENSHOT // @@ -43,17 +41,25 @@ export class CrawlerService { await browser.close(); } - async downloadFile(urls: string[], path: string) { + async downloadFiles(urls: string[], path: string) { if (!fs.existsSync(path)) { mkdirSync(path); } - console.log(urls) urls.forEach(async (url) => { if (!url.startsWith('http')) return; const response = await axios.get(url); const content = response.data; - fs.writeFileSync(`${path}${url.split('/').pop()}`, content); + // trim / from end of url string + url = url.replace(/\/$/, ""); + // get last part of url + url = url.substring(url.lastIndexOf('/') + 1); + // save file + if(url.length > 10) { + url = url.substring(0, 10); + } + console.log(`Saving file ${path}${url}`); + fs.writeFileSync(`${path}${url}`, content); }); }