fixing bugs
This commit is contained in:
parent
10af68eec7
commit
b8965e2e3e
1 changed files with 13 additions and 7 deletions
|
@ -5,10 +5,8 @@ import puppeteer from 'puppeteer';
|
|||
import { URL } from 'url';
|
||||
import axios from 'axios';
|
||||
|
||||
|
||||
@Injectable()
|
||||
export class CrawlerService {
|
||||
|
||||
async crawl(url: string){
|
||||
const browser = await puppeteer.launch();
|
||||
const page = await browser.newPage();
|
||||
|
@ -21,13 +19,13 @@ export class CrawlerService {
|
|||
// STYLESHEETS //
|
||||
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
|
||||
let cssDir = `${directory}/css/`
|
||||
await this.downloadFile(stylesheetsUrls, cssDir);
|
||||
await this.downloadFiles(stylesheetsUrls, cssDir);
|
||||
// STYLESHEETS //
|
||||
|
||||
// SCRIPTS //
|
||||
const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src));
|
||||
let scriptsDir = `${directory}/scripts/`
|
||||
await this.downloadFile(scriptsUrls, scriptsDir);
|
||||
await this.downloadFiles(scriptsUrls, scriptsDir);
|
||||
// SCRIPTS //
|
||||
|
||||
// SCREENSHOT //
|
||||
|
@ -43,17 +41,25 @@ export class CrawlerService {
|
|||
await browser.close();
|
||||
}
|
||||
|
||||
async downloadFile(urls: string[], path: string) {
|
||||
async downloadFiles(urls: string[], path: string) {
|
||||
if (!fs.existsSync(path)) {
|
||||
mkdirSync(path);
|
||||
}
|
||||
console.log(urls)
|
||||
|
||||
urls.forEach(async (url) => {
|
||||
if (!url.startsWith('http')) return;
|
||||
const response = await axios.get(url);
|
||||
const content = response.data;
|
||||
fs.writeFileSync(`${path}${url.split('/').pop()}`, content);
|
||||
// trim / from end of url string
|
||||
url = url.replace(/\/$/, "");
|
||||
// get last part of url
|
||||
url = url.substring(url.lastIndexOf('/') + 1);
|
||||
// save file
|
||||
if(url.length > 10) {
|
||||
url = url.substring(0, 10);
|
||||
}
|
||||
console.log(`Saving file ${path}${url}`);
|
||||
fs.writeFileSync(`${path}${url}`, content);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue