axios added + downloaded stylesheets

This commit is contained in:
Kfir Dayan 2023-04-18 10:56:09 +03:00
parent d81f21ee64
commit 7385d5b6ab
3 changed files with 40 additions and 11 deletions

36
package-lock.json generated
View file

@ -12,6 +12,7 @@
"@nestjs/common": "^9.0.0", "@nestjs/common": "^9.0.0",
"@nestjs/core": "^9.0.5", "@nestjs/core": "^9.0.5",
"@nestjs/platform-express": "^9.0.0", "@nestjs/platform-express": "^9.0.0",
"axios": "^1.3.5",
"puppeteer": "^19.9.1", "puppeteer": "^19.9.1",
"reflect-metadata": "^0.1.13", "reflect-metadata": "^0.1.13",
"rxjs": "^7.5.5" "rxjs": "^7.5.5"
@ -2735,8 +2736,17 @@
"node_modules/asynckit": { "node_modules/asynckit": {
"version": "0.4.0", "version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
"dev": true },
"node_modules/axios": {
"version": "1.3.5",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.3.5.tgz",
"integrity": "sha512-glL/PvG/E+xCWwV8S6nCHcrfg1exGx7vxyUIivIA1iL7BIh6bePylCfVHwp6k13ao7SATxB6imau2kqY+I67kw==",
"dependencies": {
"follow-redirects": "^1.15.0",
"form-data": "^4.0.0",
"proxy-from-env": "^1.1.0"
}
}, },
"node_modules/babel-jest": { "node_modules/babel-jest": {
"version": "28.1.2", "version": "28.1.2",
@ -3299,7 +3309,6 @@
"version": "1.0.8", "version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dev": true,
"dependencies": { "dependencies": {
"delayed-stream": "~1.0.0" "delayed-stream": "~1.0.0"
}, },
@ -3522,7 +3531,6 @@
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"dev": true,
"engines": { "engines": {
"node": ">=0.4.0" "node": ">=0.4.0"
} }
@ -4322,6 +4330,25 @@
"integrity": "sha512-0sQoMh9s0BYsm+12Huy/rkKxVu4R1+r96YX5cG44rHV0pQ6iC3Q+mkoMFaGWObMFYQxCVT+ssG1ksneA2MI9KQ==", "integrity": "sha512-0sQoMh9s0BYsm+12Huy/rkKxVu4R1+r96YX5cG44rHV0pQ6iC3Q+mkoMFaGWObMFYQxCVT+ssG1ksneA2MI9KQ==",
"dev": true "dev": true
}, },
"node_modules/follow-redirects": {
"version": "1.15.2",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
"funding": [
{
"type": "individual",
"url": "https://github.com/sponsors/RubenVerborgh"
}
],
"engines": {
"node": ">=4.0"
},
"peerDependenciesMeta": {
"debug": {
"optional": true
}
}
},
"node_modules/fork-ts-checker-webpack-plugin": { "node_modules/fork-ts-checker-webpack-plugin": {
"version": "7.2.11", "version": "7.2.11",
"resolved": "https://registry.npmjs.org/fork-ts-checker-webpack-plugin/-/fork-ts-checker-webpack-plugin-7.2.11.tgz", "resolved": "https://registry.npmjs.org/fork-ts-checker-webpack-plugin/-/fork-ts-checker-webpack-plugin-7.2.11.tgz",
@ -4375,7 +4402,6 @@
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"dev": true,
"dependencies": { "dependencies": {
"asynckit": "^0.4.0", "asynckit": "^0.4.0",
"combined-stream": "^1.0.8", "combined-stream": "^1.0.8",

View file

@ -22,6 +22,7 @@
"@nestjs/common": "^9.0.0", "@nestjs/common": "^9.0.0",
"@nestjs/core": "^9.0.5", "@nestjs/core": "^9.0.5",
"@nestjs/platform-express": "^9.0.0", "@nestjs/platform-express": "^9.0.0",
"axios": "^1.3.5",
"puppeteer": "^19.9.1", "puppeteer": "^19.9.1",
"reflect-metadata": "^0.1.13", "reflect-metadata": "^0.1.13",
"rxjs": "^7.5.5" "rxjs": "^7.5.5"

View file

@ -3,6 +3,7 @@ import { mkdirSync } from 'fs';
import * as fs from 'fs'; import * as fs from 'fs';
import puppeteer from 'puppeteer'; import puppeteer from 'puppeteer';
import { URL } from 'url'; import { URL } from 'url';
import axios from 'axios';
@Injectable() @Injectable()
export class CrawlerService { export class CrawlerService {
@ -21,14 +22,15 @@ export class CrawlerService {
// svae all stylesheets to disk // svae all stylesheets to disk
const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href)); const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href));
console.log(stylesheetsUrls) let cssDir = `${directory}/css/`
if (!fs.existsSync(cssDir)) {
mkdirSync(cssDir);
}
stylesheetsUrls.forEach(async (stylesheetUrl) => { stylesheetsUrls.forEach(async (stylesheetUrl) => {
if(!stylesheetUrl.startsWith('http')) return; if(!stylesheetUrl.startsWith('http')) return;
// console.log(stylesheetUrl) const response = await axios.get(stylesheetUrl);
const stylesheet = await page.goto(stylesheetUrl); const content = response.data;
// const content = await stylesheet.text(); fs.writeFileSync(`${cssDir}${stylesheetUrl.split('/').pop()}`, content);
// console.log(content)
}); });