diff --git a/src/crawler/crawler.service.ts b/src/crawler/crawler.service.ts index 283b4a6..b88760d 100644 --- a/src/crawler/crawler.service.ts +++ b/src/crawler/crawler.service.ts @@ -10,6 +10,7 @@ const environment = process.env.NODE_ENV || 'development'; @Injectable() export class CrawlerService { async crawl(url: string): Promise { + console.log("start crawl", url); const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] }); const page = await browser.newPage(); const domain = this.extractDomain(url); @@ -19,6 +20,7 @@ export class CrawlerService { mkdirSync(directory); } // STYLESHEETS // + console.log("start stylesheets") const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href)); let cssDir = `${directory}/css/` const cssSheetsLocation = await this.downloadFiles(stylesheetsUrls, cssDir); @@ -42,10 +44,14 @@ export class CrawlerService { // URLS // const urls = await page.$$eval('a', links => links.map(link => link.href)); + const urlsList = urls.filter((url) => url.startsWith('http')); + console.log(urlsList); + await browser.close(); return { cssSheetsLocation, - scriptsSheetsLocation + scriptsSheetsLocation, + urlsList } }