fixing async/await/Promise issue with finalurls

This commit is contained in:
Kfir Dayan 2023-04-18 12:31:40 +03:00
parent fb02db9b2f
commit eab79001ab

View file

@ -7,7 +7,8 @@ import axios from 'axios';
@Injectable() @Injectable()
export class CrawlerService { export class CrawlerService {
async crawl(url: string){ async crawl(url: string) {
const browser = await puppeteer.launch(); const browser = await puppeteer.launch();
const page = await browser.newPage(); const page = await browser.newPage();
const domain = this.extractDomain(url); const domain = this.extractDomain(url);
@ -20,21 +21,21 @@ export class CrawlerService {
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href)); const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
let cssDir = `${directory}/css/` let cssDir = `${directory}/css/`
const cssSheetsLocation = await this.downloadFiles(stylesheetsUrls, cssDir); const cssSheetsLocation = await this.downloadFiles(stylesheetsUrls, cssDir);
console.log(`cssSheetsLocation: `, cssSheetsLocation); // console.log(`cssSheetsLocation: `, cssSheetsLocation);
// STYLESHEETS // // STYLESHEETS //
// SCRIPTS // // SCRIPTS //
const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src)); const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src));
let scriptsDir = `${directory}/scripts/` let scriptsDir = `${directory}/scripts/`
const scriptsSheetsLocation = await this.downloadFiles(scriptsUrls, scriptsDir); const scriptsSheetsLocation = await this.downloadFiles(scriptsUrls, scriptsDir);
console.log(`scriptsSheetsLocation: `, scriptsSheetsLocation) // console.log(`scriptsSheetsLocation: `, scriptsSheetsLocation)
// SCRIPTS // // SCRIPTS //
// SCREENSHOT // // SCREENSHOT //
const screenshotBuffer: Buffer = await page.screenshot({ fullPage: true }); const screenshotBuffer: Buffer = await page.screenshot({ fullPage: true });
fs.writeFile(`${directory}screenshot.png`, screenshotBuffer, (err) => { fs.writeFile(`${directory}screenshot.png`, screenshotBuffer, (err) => {
if (err) throw err; if (err) throw err;
console.log(`Screenshot saved! ${directory}screenshot.png`); // console.log(`Screenshot saved! ${directory}screenshot.png`);
}); });
// SCREENSHOT // // SCREENSHOT //
@ -44,27 +45,31 @@ export class CrawlerService {
} }
async downloadFiles(urls: string[], path: string) { async downloadFiles(urls: string[], path: string) {
const finalUrls = []; const finalUrls: string[] = [];
if (!fs.existsSync(path)) { if (!fs.existsSync(path)) {
mkdirSync(path); mkdirSync(path);
} }
urls.forEach(async (url) => { await Promise.all(
if (!url.startsWith('http')) return; urls.map(async (url) => {
const response = await axios.get(url); if (!url.startsWith('http')) return;
const content = response.data; const response = await axios.get(url);
// trim / from end of url string const content = response.data;
url = url.replace(/\/$/, ""); // trim / from end of url string
// get last part of url let fileLocation = url.replace(/\/$/, "");
url = url.substring(url.lastIndexOf('/') + 1); // get last part of url
// save file fileLocation = fileLocation.substring(fileLocation.lastIndexOf('/') + 1);
if(url.length > 10) { // save file
url = url.substring(0, 10); if (fileLocation.length > 10) {
} fileLocation = fileLocation.substring(0, 10);
finalUrls.push(url); }
console.log(`Saving file ${path}${url}`); console.log("fileLocation: " + fileLocation)
fs.writeFileSync(`${path}${url}`, content); finalUrls.push(fileLocation);
}); // console.log(`Saving file ${path}${url}`);
fs.writeFileSync(`${path}${fileLocation}`, content);
})
);
console.log(`finalUrls: `, finalUrls); console.log(`finalUrls: `, finalUrls);
return finalUrls; return finalUrls;
} }