wip
This commit is contained in:
parent
daa1f363c8
commit
d81f21ee64
1 changed files with 42 additions and 3 deletions
|
@ -12,16 +12,55 @@ export class CrawlerService {
|
|||
const page = await browser.newPage();
|
||||
await page.goto(url);
|
||||
const directory = `crawler_assets/${this.extractDomain(url)}/`;
|
||||
console.log(directory)
|
||||
if (!fs.existsSync(directory)) {
|
||||
mkdirSync(directory);
|
||||
}
|
||||
await page.pdf({path: `${directory}/page.pdf`, format: 'A4'});
|
||||
await page.pdf({path: `${directory}/page.pdf`});
|
||||
|
||||
// extract stylesheets
|
||||
// svae all stylesheets to disk
|
||||
|
||||
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
|
||||
console.log(stylesheetsUrls)
|
||||
stylesheetsUrls.forEach(async (stylesheetUrl) => {
|
||||
if(!stylesheetUrl.startsWith('http')) return;
|
||||
// console.log(stylesheetUrl)
|
||||
const stylesheet = await page.goto(stylesheetUrl);
|
||||
// const content = await stylesheet.text();
|
||||
// console.log(content)
|
||||
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
// const content = await page.content();
|
||||
// console.log(content)
|
||||
// console.log(content)
|
||||
// stylesheets.forEach(async (stylesheetUrl) => {
|
||||
// if(!stylesheetUrl.startsWith('http')) return;
|
||||
|
||||
// const stylesheet = await page.goto(stylesheetUrl);
|
||||
// const content = await stylesheet.text();
|
||||
// console.log(content)
|
||||
// console.log(stylesheet)
|
||||
// const response = await page.goto(stylesheet);
|
||||
// const content = await response.text();
|
||||
// console.log(content)
|
||||
// });
|
||||
|
||||
// extract scripts
|
||||
// save all scripts to disk
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
await page.screenshot({path: `${directory}/screenshot.png`});
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
extractDomain(urlString) {
|
||||
extractDomain(urlString: string) {
|
||||
const url = new URL(urlString);
|
||||
return url.hostname;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue