downloading scriptsheets
This commit is contained in:
parent
7385d5b6ab
commit
e4d982891f
1 changed files with 35 additions and 27 deletions
|
@ -5,22 +5,24 @@ import puppeteer from 'puppeteer';
|
|||
import { URL } from 'url';
|
||||
import axios from 'axios';
|
||||
|
||||
interface ScreenshotResponse {
|
||||
buffer: Buffer;
|
||||
path?: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class CrawlerService {
|
||||
|
||||
async crawl(url: string){
|
||||
const browser = await puppeteer.launch();
|
||||
const page = await browser.newPage();
|
||||
const domain = this.extractDomain(url);
|
||||
await page.goto(url);
|
||||
const directory = `crawler_assets/${this.extractDomain(url)}/`;
|
||||
const directory = `crawler_assets/${domain}/`;
|
||||
if (!fs.existsSync(directory)) {
|
||||
mkdirSync(directory);
|
||||
}
|
||||
await page.pdf({path: `${directory}/page.pdf`});
|
||||
|
||||
// extract stylesheets
|
||||
// svae all stylesheets to disk
|
||||
|
||||
// STYLESHEETS //
|
||||
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
|
||||
let cssDir = `${directory}/css/`
|
||||
if (!fs.existsSync(cssDir)) {
|
||||
|
@ -32,33 +34,39 @@ export class CrawlerService {
|
|||
const content = response.data;
|
||||
fs.writeFileSync(`${cssDir}${stylesheetUrl.split('/').pop()}`, content);
|
||||
});
|
||||
// STYLESHEETS //
|
||||
|
||||
// SCRIPTS //
|
||||
const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src));
|
||||
let scriptsDir = `${directory}/scripts/`
|
||||
if (!fs.existsSync(scriptsDir)) {
|
||||
mkdirSync(scriptsDir);
|
||||
}
|
||||
console.log(scriptsUrls)
|
||||
|
||||
scriptsUrls.forEach(async (scriptUrl) => {
|
||||
if (!scriptUrl.startsWith('http')) return;
|
||||
const response = await axios.get(scriptUrl);
|
||||
const content = response.data;
|
||||
fs.writeFileSync(`${scriptsDir}${scriptUrl.split('/').pop()}`, content);
|
||||
});
|
||||
// SCRIPTS //
|
||||
|
||||
// SCREENSHOT //
|
||||
const screenshotBuffer: Buffer = await page.screenshot();
|
||||
fs.writeFile(`${directory}screenshot.png`, screenshotBuffer, (err) => {
|
||||
if (err) throw err;
|
||||
console.log(`Screenshot saved! ${directory}screenshot.png`);
|
||||
});
|
||||
// SCREENSHOT //
|
||||
|
||||
// const content = await page.content();
|
||||
// console.log(content)
|
||||
// console.log(content)
|
||||
// stylesheets.forEach(async (stylesheetUrl) => {
|
||||
// if(!stylesheetUrl.startsWith('http')) return;
|
||||
|
||||
// const stylesheet = await page.goto(stylesheetUrl);
|
||||
// const content = await stylesheet.text();
|
||||
// console.log(content)
|
||||
// console.log(stylesheet)
|
||||
// const response = await page.goto(stylesheet);
|
||||
// const content = await response.text();
|
||||
// console.log(content)
|
||||
// });
|
||||
|
||||
// extract scripts
|
||||
// save all scripts to disk
|
||||
|
||||
|
||||
|
||||
// URLS //
|
||||
const urls = await page.$$eval('a', links => links.map(link => link.href));
|
||||
console.log(urls);
|
||||
|
||||
|
||||
await page.screenshot({path: `${directory}/screenshot.png`});
|
||||
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue