fixing bugs

This commit is contained in:
Kfir Dayan 2023-04-18 11:46:57 +03:00
parent 10af68eec7
commit b8965e2e3e

View file

@ -5,10 +5,8 @@ import puppeteer from 'puppeteer';
import { URL } from 'url'; import { URL } from 'url';
import axios from 'axios'; import axios from 'axios';
@Injectable() @Injectable()
export class CrawlerService { export class CrawlerService {
async crawl(url: string){ async crawl(url: string){
const browser = await puppeteer.launch(); const browser = await puppeteer.launch();
const page = await browser.newPage(); const page = await browser.newPage();
@ -21,13 +19,13 @@ export class CrawlerService {
// STYLESHEETS // // STYLESHEETS //
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href)); const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
let cssDir = `${directory}/css/` let cssDir = `${directory}/css/`
await this.downloadFile(stylesheetsUrls, cssDir); await this.downloadFiles(stylesheetsUrls, cssDir);
// STYLESHEETS // // STYLESHEETS //
// SCRIPTS // // SCRIPTS //
const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src)); const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src));
let scriptsDir = `${directory}/scripts/` let scriptsDir = `${directory}/scripts/`
await this.downloadFile(scriptsUrls, scriptsDir); await this.downloadFiles(scriptsUrls, scriptsDir);
// SCRIPTS // // SCRIPTS //
// SCREENSHOT // // SCREENSHOT //
@ -43,17 +41,25 @@ export class CrawlerService {
await browser.close(); await browser.close();
} }
async downloadFile(urls: string[], path: string) { async downloadFiles(urls: string[], path: string) {
if (!fs.existsSync(path)) { if (!fs.existsSync(path)) {
mkdirSync(path); mkdirSync(path);
} }
console.log(urls)
urls.forEach(async (url) => { urls.forEach(async (url) => {
if (!url.startsWith('http')) return; if (!url.startsWith('http')) return;
const response = await axios.get(url); const response = await axios.get(url);
const content = response.data; const content = response.data;
fs.writeFileSync(`${path}${url.split('/').pop()}`, content); // trim / from end of url string
url = url.replace(/\/$/, "");
// get last part of url
url = url.substring(url.lastIndexOf('/') + 1);
// save file
if(url.length > 10) {
url = url.substring(0, 10);
}
console.log(`Saving file ${path}${url}`);
fs.writeFileSync(`${path}${url}`, content);
}); });
} }