fixing async/await/Promise issue with finalurls
This commit is contained in:
parent
fb02db9b2f
commit
eab79001ab
1 changed files with 26 additions and 21 deletions
|
@ -7,7 +7,8 @@ import axios from 'axios';
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class CrawlerService {
|
export class CrawlerService {
|
||||||
async crawl(url: string){
|
async crawl(url: string) {
|
||||||
|
|
||||||
const browser = await puppeteer.launch();
|
const browser = await puppeteer.launch();
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
const domain = this.extractDomain(url);
|
const domain = this.extractDomain(url);
|
||||||
|
@ -20,21 +21,21 @@ export class CrawlerService {
|
||||||
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
|
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
|
||||||
let cssDir = `${directory}/css/`
|
let cssDir = `${directory}/css/`
|
||||||
const cssSheetsLocation = await this.downloadFiles(stylesheetsUrls, cssDir);
|
const cssSheetsLocation = await this.downloadFiles(stylesheetsUrls, cssDir);
|
||||||
console.log(`cssSheetsLocation: `, cssSheetsLocation);
|
// console.log(`cssSheetsLocation: `, cssSheetsLocation);
|
||||||
// STYLESHEETS //
|
// STYLESHEETS //
|
||||||
|
|
||||||
// SCRIPTS //
|
// SCRIPTS //
|
||||||
const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src));
|
const scriptsUrls = await page.$$eval('script', scripts => scripts.map(script => script.src));
|
||||||
let scriptsDir = `${directory}/scripts/`
|
let scriptsDir = `${directory}/scripts/`
|
||||||
const scriptsSheetsLocation = await this.downloadFiles(scriptsUrls, scriptsDir);
|
const scriptsSheetsLocation = await this.downloadFiles(scriptsUrls, scriptsDir);
|
||||||
console.log(`scriptsSheetsLocation: `, scriptsSheetsLocation)
|
// console.log(`scriptsSheetsLocation: `, scriptsSheetsLocation)
|
||||||
// SCRIPTS //
|
// SCRIPTS //
|
||||||
|
|
||||||
// SCREENSHOT //
|
// SCREENSHOT //
|
||||||
const screenshotBuffer: Buffer = await page.screenshot({ fullPage: true });
|
const screenshotBuffer: Buffer = await page.screenshot({ fullPage: true });
|
||||||
fs.writeFile(`${directory}screenshot.png`, screenshotBuffer, (err) => {
|
fs.writeFile(`${directory}screenshot.png`, screenshotBuffer, (err) => {
|
||||||
if (err) throw err;
|
if (err) throw err;
|
||||||
console.log(`Screenshot saved! ${directory}screenshot.png`);
|
// console.log(`Screenshot saved! ${directory}screenshot.png`);
|
||||||
});
|
});
|
||||||
// SCREENSHOT //
|
// SCREENSHOT //
|
||||||
|
|
||||||
|
@ -44,27 +45,31 @@ export class CrawlerService {
|
||||||
}
|
}
|
||||||
|
|
||||||
async downloadFiles(urls: string[], path: string) {
|
async downloadFiles(urls: string[], path: string) {
|
||||||
const finalUrls = [];
|
const finalUrls: string[] = [];
|
||||||
if (!fs.existsSync(path)) {
|
if (!fs.existsSync(path)) {
|
||||||
mkdirSync(path);
|
mkdirSync(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
urls.forEach(async (url) => {
|
await Promise.all(
|
||||||
|
urls.map(async (url) => {
|
||||||
if (!url.startsWith('http')) return;
|
if (!url.startsWith('http')) return;
|
||||||
const response = await axios.get(url);
|
const response = await axios.get(url);
|
||||||
const content = response.data;
|
const content = response.data;
|
||||||
// trim / from end of url string
|
// trim / from end of url string
|
||||||
url = url.replace(/\/$/, "");
|
let fileLocation = url.replace(/\/$/, "");
|
||||||
// get last part of url
|
// get last part of url
|
||||||
url = url.substring(url.lastIndexOf('/') + 1);
|
fileLocation = fileLocation.substring(fileLocation.lastIndexOf('/') + 1);
|
||||||
// save file
|
// save file
|
||||||
if(url.length > 10) {
|
if (fileLocation.length > 10) {
|
||||||
url = url.substring(0, 10);
|
fileLocation = fileLocation.substring(0, 10);
|
||||||
}
|
}
|
||||||
finalUrls.push(url);
|
console.log("fileLocation: " + fileLocation)
|
||||||
console.log(`Saving file ${path}${url}`);
|
finalUrls.push(fileLocation);
|
||||||
fs.writeFileSync(`${path}${url}`, content);
|
// console.log(`Saving file ${path}${url}`);
|
||||||
});
|
fs.writeFileSync(`${path}${fileLocation}`, content);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
console.log(`finalUrls: `, finalUrls);
|
console.log(`finalUrls: `, finalUrls);
|
||||||
return finalUrls;
|
return finalUrls;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue