adding urlsList to the crawling
This commit is contained in:
parent
9d3e39683c
commit
75dc340ec5
1 changed files with 7 additions and 1 deletions
|
@ -10,6 +10,7 @@ const environment = process.env.NODE_ENV || 'development';
|
|||
@Injectable()
|
||||
export class CrawlerService {
|
||||
async crawl(url: string): Promise<any> {
|
||||
console.log("start crawl", url);
|
||||
const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] });
|
||||
const page = await browser.newPage();
|
||||
const domain = this.extractDomain(url);
|
||||
|
@ -19,6 +20,7 @@ export class CrawlerService {
|
|||
mkdirSync(directory);
|
||||
}
|
||||
// STYLESHEETS //
|
||||
console.log("start stylesheets")
|
||||
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
|
||||
let cssDir = `${directory}/css/`
|
||||
const cssSheetsLocation = await this.downloadFiles(stylesheetsUrls, cssDir);
|
||||
|
@ -42,10 +44,14 @@ export class CrawlerService {
|
|||
|
||||
// URLS //
|
||||
const urls = await page.$$eval('a', links => links.map(link => link.href));
|
||||
const urlsList = urls.filter((url) => url.startsWith('http'));
|
||||
console.log(urlsList);
|
||||
|
||||
await browser.close();
|
||||
return {
|
||||
cssSheetsLocation,
|
||||
scriptsSheetsLocation
|
||||
scriptsSheetsLocation,
|
||||
urlsList
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue