From 7385d5b6ab94820a2bad63c9ca6cbd5114ee2a58 Mon Sep 17 00:00:00 2001 From: Kfir Dayan Date: Tue, 18 Apr 2023 10:56:09 +0300 Subject: [PATCH] axios added + downloaded stylesheets --- package-lock.json | 36 +++++++++++++++++++++++++++++----- package.json | 1 + src/crawler/crawler.service.ts | 14 +++++++------ 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/package-lock.json b/package-lock.json index 916e241..fdd7e52 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "@nestjs/common": "^9.0.0", "@nestjs/core": "^9.0.5", "@nestjs/platform-express": "^9.0.0", + "axios": "^1.3.5", "puppeteer": "^19.9.1", "reflect-metadata": "^0.1.13", "rxjs": "^7.5.5" @@ -2735,8 +2736,17 @@ "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", - "dev": true + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/axios": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.3.5.tgz", + "integrity": "sha512-glL/PvG/E+xCWwV8S6nCHcrfg1exGx7vxyUIivIA1iL7BIh6bePylCfVHwp6k13ao7SATxB6imau2kqY+I67kw==", + "dependencies": { + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } }, "node_modules/babel-jest": { "version": "28.1.2", @@ -3299,7 +3309,6 @@ "version": "1.0.8", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "dev": true, "dependencies": { "delayed-stream": "~1.0.0" }, @@ -3522,7 +3531,6 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", - "dev": true, "engines": { "node": ">=0.4.0" } @@ -4322,6 +4330,25 @@ "integrity": "sha512-0sQoMh9s0BYsm+12Huy/rkKxVu4R1+r96YX5cG44rHV0pQ6iC3Q+mkoMFaGWObMFYQxCVT+ssG1ksneA2MI9KQ==", "dev": true }, + "node_modules/follow-redirects": { + "version": "1.15.2", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", + "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, "node_modules/fork-ts-checker-webpack-plugin": { "version": "7.2.11", "resolved": "https://registry.npmjs.org/fork-ts-checker-webpack-plugin/-/fork-ts-checker-webpack-plugin-7.2.11.tgz", @@ -4375,7 +4402,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", - "dev": true, "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", diff --git a/package.json b/package.json index a101a71..e144ccd 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "@nestjs/common": "^9.0.0", "@nestjs/core": "^9.0.5", "@nestjs/platform-express": "^9.0.0", + "axios": "^1.3.5", "puppeteer": "^19.9.1", "reflect-metadata": "^0.1.13", "rxjs": "^7.5.5" diff --git a/src/crawler/crawler.service.ts b/src/crawler/crawler.service.ts index 12a58f2..4150e30 100644 --- a/src/crawler/crawler.service.ts +++ b/src/crawler/crawler.service.ts @@ -3,6 +3,7 @@ import { mkdirSync } from 'fs'; import * as fs from 'fs'; import puppeteer from 'puppeteer'; import { URL } from 'url'; +import axios from 'axios'; @Injectable() export class CrawlerService { @@ -21,14 +22,15 @@ export class CrawlerService { // svae all stylesheets to disk const stylesheetsUrls = await page.$$eval('link[rel="stylesheet"]', links => links.map(link => link.href)); - console.log(stylesheetsUrls) + let cssDir = `${directory}/css/` + if (!fs.existsSync(cssDir)) { + mkdirSync(cssDir); + } stylesheetsUrls.forEach(async (stylesheetUrl) => { if(!stylesheetUrl.startsWith('http')) return; - // console.log(stylesheetUrl) - const stylesheet = await page.goto(stylesheetUrl); - // const content = await stylesheet.text(); - // console.log(content) - + const response = await axios.get(stylesheetUrl); + const content = response.data; + fs.writeFileSync(`${cssDir}${stylesheetUrl.split('/').pop()}`, content); });