diff --git a/src/crawler/crawler.service.ts b/src/crawler/crawler.service.ts index 0895d45..ba9c2e2 100644 --- a/src/crawler/crawler.service.ts +++ b/src/crawler/crawler.service.ts @@ -7,7 +7,8 @@ import axios from 'axios'; @Injectable() export class CrawlerService { - async crawl(url: string){ + async crawl(url: string) { + const browser = await puppeteer.launch(); const page = await browser.newPage(); const domain = this.extractDomain(url); @@ -20,21 +21,21 @@ export class CrawlerService { const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href)); let cssDir = `${directory}/css/` const cssSheetsLocation = await this.downloadFiles(stylesheetsUrls, cssDir); - console.log(`cssSheetsLocation: `, cssSheetsLocation); + // console.log(`cssSheetsLocation: `, cssSheetsLocation); // STYLESHEETS // // SCRIPTS // const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src)); let scriptsDir = `${directory}/scripts/` const scriptsSheetsLocation = await this.downloadFiles(scriptsUrls, scriptsDir); - console.log(`scriptsSheetsLocation: `, scriptsSheetsLocation) + // console.log(`scriptsSheetsLocation: `, scriptsSheetsLocation) // SCRIPTS // // SCREENSHOT // const screenshotBuffer: Buffer = await page.screenshot({ fullPage: true }); fs.writeFile(`${directory}screenshot.png`, screenshotBuffer, (err) => { if (err) throw err; - console.log(`Screenshot saved! ${directory}screenshot.png`); + // console.log(`Screenshot saved! ${directory}screenshot.png`); }); // SCREENSHOT // @@ -44,27 +45,31 @@ export class CrawlerService { } async downloadFiles(urls: string[], path: string) { - const finalUrls = []; + const finalUrls: string[] = []; if (!fs.existsSync(path)) { mkdirSync(path); } - urls.forEach(async (url) => { - if (!url.startsWith('http')) return; - const response = await axios.get(url); - const content = response.data; - // trim / from end of url string - url = url.replace(/\/$/, ""); - // get last part of url - url = url.substring(url.lastIndexOf('/') + 1); - // save file - if(url.length > 10) { - url = url.substring(0, 10); - } - finalUrls.push(url); - console.log(`Saving file ${path}${url}`); - fs.writeFileSync(`${path}${url}`, content); - }); + await Promise.all( + urls.map(async (url) => { + if (!url.startsWith('http')) return; + const response = await axios.get(url); + const content = response.data; + // trim / from end of url string + let fileLocation = url.replace(/\/$/, ""); + // get last part of url + fileLocation = fileLocation.substring(fileLocation.lastIndexOf('/') + 1); + // save file + if (fileLocation.length > 10) { + fileLocation = fileLocation.substring(0, 10); + } + console.log("fileLocation: " + fileLocation) + finalUrls.push(fileLocation); + // console.log(`Saving file ${path}${url}`); + fs.writeFileSync(`${path}${fileLocation}`, content); + }) + ); + console.log(`finalUrls: `, finalUrls); return finalUrls; }