wip
This commit is contained in:
parent
daa1f363c8
commit
d81f21ee64
1 changed files with 42 additions and 3 deletions
|
@ -12,16 +12,55 @@ export class CrawlerService {
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
await page.goto(url);
|
await page.goto(url);
|
||||||
const directory = `crawler_assets/${this.extractDomain(url)}/`;
|
const directory = `crawler_assets/${this.extractDomain(url)}/`;
|
||||||
console.log(directory)
|
|
||||||
if (!fs.existsSync(directory)) {
|
if (!fs.existsSync(directory)) {
|
||||||
mkdirSync(directory);
|
mkdirSync(directory);
|
||||||
}
|
}
|
||||||
await page.pdf({path: `${directory}/page.pdf`, format: 'A4'});
|
await page.pdf({path: `${directory}/page.pdf`});
|
||||||
|
|
||||||
|
// extract stylesheets
|
||||||
|
// svae all stylesheets to disk
|
||||||
|
|
||||||
|
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
|
||||||
|
console.log(stylesheetsUrls)
|
||||||
|
stylesheetsUrls.forEach(async (stylesheetUrl) => {
|
||||||
|
if(!stylesheetUrl.startsWith('http')) return;
|
||||||
|
// console.log(stylesheetUrl)
|
||||||
|
const stylesheet = await page.goto(stylesheetUrl);
|
||||||
|
// const content = await stylesheet.text();
|
||||||
|
// console.log(content)
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// const content = await page.content();
|
||||||
|
// console.log(content)
|
||||||
|
// console.log(content)
|
||||||
|
// stylesheets.forEach(async (stylesheetUrl) => {
|
||||||
|
// if(!stylesheetUrl.startsWith('http')) return;
|
||||||
|
|
||||||
|
// const stylesheet = await page.goto(stylesheetUrl);
|
||||||
|
// const content = await stylesheet.text();
|
||||||
|
// console.log(content)
|
||||||
|
// console.log(stylesheet)
|
||||||
|
// const response = await page.goto(stylesheet);
|
||||||
|
// const content = await response.text();
|
||||||
|
// console.log(content)
|
||||||
|
// });
|
||||||
|
|
||||||
|
// extract scripts
|
||||||
|
// save all scripts to disk
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
await page.screenshot({path: `${directory}/screenshot.png`});
|
await page.screenshot({path: `${directory}/screenshot.png`});
|
||||||
await browser.close();
|
await browser.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
extractDomain(urlString) {
|
extractDomain(urlString: string) {
|
||||||
const url = new URL(urlString);
|
const url = new URL(urlString);
|
||||||
return url.hostname;
|
return url.hostname;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue