Compare commits

..

No commits in common. "e775a58400fcb53999500a9cf21599e333acc410" and "b3b6cb403f37242d76549c81d108f272b5126749" have entirely different histories.

5 changed files with 7 additions and 261 deletions

View file

@ -2,10 +2,10 @@ FROM ubuntu:20.04 AS base
# Set non-interactive mode
ENV DEBIAN_FRONTEND noninteractive
ENV NODE_ENV=production
# Install required packages
RUN apt-get update && \
apt-get install -y curl wget gnupg2 libgbm-dev ca-certificates fonts-liberation libasound2 libatk-bridge2.0-0 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgbm1 libgcc1 libglib2.0-0 libgtk-3-0 libnspr4 libnss3 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 lsb-release wget xdg-utils && \
curl -sL https://deb.nodesource.com/setup_14.x | bash - && \
apt-get install -y nodejs chromium-browser

254
package-lock.json generated
View file

@ -11,11 +11,8 @@
"dependencies": {
"@nestjs/common": "^9.0.0",
"@nestjs/core": "^9.0.5",
"@nestjs/mongoose": "^9.2.2",
"@nestjs/platform-express": "^9.0.0",
"axios": "^1.3.5",
"dotenv": "^16.0.3",
"mongoose": "^7.0.4",
"puppeteer": "^19.9.1",
"reflect-metadata": "^0.1.13",
"rxjs": "^7.5.5"
@ -1567,18 +1564,6 @@
}
}
},
"node_modules/@nestjs/mongoose": {
"version": "9.2.2",
"resolved": "https://registry.npmjs.org/@nestjs/mongoose/-/mongoose-9.2.2.tgz",
"integrity": "sha512-szNuSUCwwbQSSeiTh8+tZ9fHV4nuzHwBDROb0hX0s7crwY15TunCfwyKbB2XjqkEQWUAasDeCBuKOJSL9N6tTg==",
"peerDependencies": {
"@nestjs/common": "^8.0.0 || ^9.0.0",
"@nestjs/core": "^8.0.0 || ^9.0.0",
"mongoose": "^6.0.2 || ^7.0.0",
"reflect-metadata": "^0.1.12",
"rxjs": "^7.0.0"
}
},
"node_modules/@nestjs/platform-express": {
"version": "9.0.0",
"resolved": "https://registry.npmjs.org/@nestjs/platform-express/-/platform-express-9.0.0.tgz",
@ -1986,7 +1971,8 @@
"node_modules/@types/node": {
"version": "18.0.3",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.0.3.tgz",
"integrity": "sha512-HzNRZtp4eepNitP+BD6k2L6DROIDG4Q0fm4x+dwfsr6LGmROENnok75VGw40628xf+iR24WeMFcHuuBDUAzzsQ=="
"integrity": "sha512-HzNRZtp4eepNitP+BD6k2L6DROIDG4Q0fm4x+dwfsr6LGmROENnok75VGw40628xf+iR24WeMFcHuuBDUAzzsQ==",
"devOptional": true
},
"node_modules/@types/parse-json": {
"version": "4.0.0",
@ -2047,20 +2033,6 @@
"@types/superagent": "*"
}
},
"node_modules/@types/webidl-conversions": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/@types/webidl-conversions/-/webidl-conversions-7.0.0.tgz",
"integrity": "sha512-xTE1E+YF4aWPJJeUzaZI5DRntlkY3+BCVJi0axFptnjGmAoWxkyREIh/XMrfxVLejwQxMCfDXdICo0VLxThrog=="
},
"node_modules/@types/whatwg-url": {
"version": "8.2.2",
"resolved": "https://registry.npmjs.org/@types/whatwg-url/-/whatwg-url-8.2.2.tgz",
"integrity": "sha512-FtQu10RWgn3D9U4aazdwIE2yzphmTJREDqNdODHrbrZmmMqI0vMheC/6NE/J1Yveaj8H+ela+YwWTjq5PGmuhA==",
"dependencies": {
"@types/node": "*",
"@types/webidl-conversions": "*"
}
},
"node_modules/@types/yargs": {
"version": "17.0.10",
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.10.tgz",
@ -3039,14 +3011,6 @@
"node-int64": "^0.4.0"
}
},
"node_modules/bson": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/bson/-/bson-5.2.0.tgz",
"integrity": "sha512-HevkSpDbpUfsrHWmWiAsNavANKYIErV2ePXllp1bwq5CDreAaFVj6RVlZpJnxK4WWDCJ/5jMUpaY6G526q3Hjg==",
"engines": {
"node": ">=14.20.1"
}
},
"node_modules/buffer": {
"version": "5.7.1",
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
@ -3644,14 +3608,6 @@
"node": ">=6.0.0"
}
},
"node_modules/dotenv": {
"version": "16.0.3",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz",
"integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==",
"engines": {
"node": ">=12"
}
},
"node_modules/ee-first": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
@ -4982,11 +4938,6 @@
"node": ">= 0.10"
}
},
"node_modules/ip": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ip/-/ip-2.0.0.tgz",
"integrity": "sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ=="
},
"node_modules/ipaddr.js": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
@ -6258,14 +6209,6 @@
"graceful-fs": "^4.1.6"
}
},
"node_modules/kareem": {
"version": "2.5.1",
"resolved": "https://registry.npmjs.org/kareem/-/kareem-2.5.1.tgz",
"integrity": "sha512-7jFxRVm+jD+rkq3kY0iZDJfsO2/t4BBPeEb2qKn2lR/9KhuksYk5hxzfRYWMPV8P/x2d0kHD306YyWLzjjH+uA==",
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/kleur": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz",
@ -6468,12 +6411,6 @@
"node": ">= 4.0.0"
}
},
"node_modules/memory-pager": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/memory-pager/-/memory-pager-1.5.0.tgz",
"integrity": "sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==",
"optional": true
},
"node_modules/merge-descriptors": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
@ -6592,144 +6529,6 @@
"resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
},
"node_modules/mongodb": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/mongodb/-/mongodb-5.1.0.tgz",
"integrity": "sha512-qgKb7y+EI90y4weY3z5+lIgm8wmexbonz0GalHkSElQXVKtRuwqXuhXKccyvIjXCJVy9qPV82zsinY0W1FBnJw==",
"dependencies": {
"bson": "^5.0.1",
"mongodb-connection-string-url": "^2.6.0",
"socks": "^2.7.1"
},
"engines": {
"node": ">=14.20.1"
},
"optionalDependencies": {
"saslprep": "^1.0.3"
},
"peerDependencies": {
"@aws-sdk/credential-providers": "^3.201.0",
"mongodb-client-encryption": "^2.3.0",
"snappy": "^7.2.2"
},
"peerDependenciesMeta": {
"@aws-sdk/credential-providers": {
"optional": true
},
"mongodb-client-encryption": {
"optional": true
},
"snappy": {
"optional": true
}
}
},
"node_modules/mongodb-connection-string-url": {
"version": "2.6.0",
"resolved": "https://registry.npmjs.org/mongodb-connection-string-url/-/mongodb-connection-string-url-2.6.0.tgz",
"integrity": "sha512-WvTZlI9ab0QYtTYnuMLgobULWhokRjtC7db9LtcVfJ+Hsnyr5eo6ZtNAt3Ly24XZScGMelOcGtm7lSn0332tPQ==",
"dependencies": {
"@types/whatwg-url": "^8.2.1",
"whatwg-url": "^11.0.0"
}
},
"node_modules/mongodb-connection-string-url/node_modules/tr46": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-3.0.0.tgz",
"integrity": "sha512-l7FvfAHlcmulp8kr+flpQZmVwtu7nfRV7NZujtN0OqES8EL4O4e0qqzL0DC5gAvx/ZC/9lk6rhcUwYvkBnBnYA==",
"dependencies": {
"punycode": "^2.1.1"
},
"engines": {
"node": ">=12"
}
},
"node_modules/mongodb-connection-string-url/node_modules/webidl-conversions": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz",
"integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==",
"engines": {
"node": ">=12"
}
},
"node_modules/mongodb-connection-string-url/node_modules/whatwg-url": {
"version": "11.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-11.0.0.tgz",
"integrity": "sha512-RKT8HExMpoYx4igMiVMY83lN6UeITKJlBQ+vR/8ZJ8OCdSiN3RwCq+9gH0+Xzj0+5IrM6i4j/6LuvzbZIQgEcQ==",
"dependencies": {
"tr46": "^3.0.0",
"webidl-conversions": "^7.0.0"
},
"engines": {
"node": ">=12"
}
},
"node_modules/mongoose": {
"version": "7.0.4",
"resolved": "https://registry.npmjs.org/mongoose/-/mongoose-7.0.4.tgz",
"integrity": "sha512-MEmQOOqQUvW1PJcji64NtA2EFGHrEvk9o4g//isVYSJW2+8Y8u49C2qFBKzn1t6/l9onQn012o/PcFqR6ixQpQ==",
"dependencies": {
"bson": "^5.0.1",
"kareem": "2.5.1",
"mongodb": "5.1.0",
"mpath": "0.9.0",
"mquery": "5.0.0",
"ms": "2.1.3",
"sift": "16.0.1"
},
"engines": {
"node": ">=14.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/mongoose"
}
},
"node_modules/mongoose/node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
},
"node_modules/mpath": {
"version": "0.9.0",
"resolved": "https://registry.npmjs.org/mpath/-/mpath-0.9.0.tgz",
"integrity": "sha512-ikJRQTk8hw5DEoFVxHG1Gn9T/xcjtdnOKIU1JTmGjZZlg9LST2mBLmcX3/ICIbgJydT2GOc15RnNy5mHmzfSew==",
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/mquery": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/mquery/-/mquery-5.0.0.tgz",
"integrity": "sha512-iQMncpmEK8R8ncT8HJGsGc9Dsp8xcgYMVSbs5jgnm1lFHTZqMJTUWTDx1LBO8+mK3tPNZWFLBghQEIOULSTHZg==",
"dependencies": {
"debug": "4.x"
},
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/mquery/node_modules/debug": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
"integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
"dependencies": {
"ms": "2.1.2"
},
"engines": {
"node": ">=6.0"
},
"peerDependenciesMeta": {
"supports-color": {
"optional": true
}
}
},
"node_modules/mquery/node_modules/ms": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"node_modules/ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
@ -7268,6 +7067,7 @@
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz",
"integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==",
"dev": true,
"engines": {
"node": ">=6"
}
@ -7658,18 +7458,6 @@
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
},
"node_modules/saslprep": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/saslprep/-/saslprep-1.0.3.tgz",
"integrity": "sha512-/MY/PEMbk2SuY5sScONwhUDsV2p77Znkb/q3nSVstq/yQzYJOH/Azh29p9oJLsl3LnQwSvZDKagDGBsBwSooag==",
"optional": true,
"dependencies": {
"sparse-bitfield": "^3.0.3"
},
"engines": {
"node": ">=6"
}
},
"node_modules/schema-utils": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.1.tgz",
@ -7810,11 +7598,6 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/sift": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/sift/-/sift-16.0.1.tgz",
"integrity": "sha512-Wv6BjQ5zbhW7VFefWusVP33T/EM0vYikCaQ2qR8yULbsilAT8/wQaXvuQ3ptGLpoKx+lihJE3y2UTgKDyyNHZQ=="
},
"node_modules/signal-exit": {
"version": "3.0.7",
"resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
@ -7836,28 +7619,6 @@
"node": ">=8"
}
},
"node_modules/smart-buffer": {
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz",
"integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==",
"engines": {
"node": ">= 6.0.0",
"npm": ">= 3.0.0"
}
},
"node_modules/socks": {
"version": "2.7.1",
"resolved": "https://registry.npmjs.org/socks/-/socks-2.7.1.tgz",
"integrity": "sha512-7maUZy1N7uo6+WVEX6psASxtNlKaNVMlGQKkG/63nEDdLOWNbiUMoLK7X4uYoLhQstau72mLgfEWcXcwsaHbYQ==",
"dependencies": {
"ip": "^2.0.0",
"smart-buffer": "^4.2.0"
},
"engines": {
"node": ">= 10.13.0",
"npm": ">= 3.0.0"
}
},
"node_modules/source-map": {
"version": "0.7.3",
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz",
@ -7893,15 +7654,6 @@
"deprecated": "Please use @jridgewell/sourcemap-codec instead",
"dev": true
},
"node_modules/sparse-bitfield": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/sparse-bitfield/-/sparse-bitfield-3.0.3.tgz",
"integrity": "sha512-kvzhi7vqKTfkh0PZU+2D2PIllw2ymqJKujUcyPMd9Y75Nv4nPbGJZXNhxsgdQab2BmlDct1YnfQCguEvHr7VsQ==",
"optional": true,
"dependencies": {
"memory-pager": "^1.0.2"
}
},
"node_modules/sprintf-js": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",

View file

@ -21,11 +21,8 @@
"dependencies": {
"@nestjs/common": "^9.0.0",
"@nestjs/core": "^9.0.5",
"@nestjs/mongoose": "^9.2.2",
"@nestjs/platform-express": "^9.0.0",
"axios": "^1.3.5",
"dotenv": "^16.0.3",
"mongoose": "^7.0.4",
"puppeteer": "^19.9.1",
"reflect-metadata": "^0.1.13",
"rxjs": "^7.5.5"

View file

@ -5,12 +5,11 @@ import puppeteer from 'puppeteer';
import { URL } from 'url';
import axios from 'axios';
const environment = process.env.NODE_ENV || 'development';
@Injectable()
export class CrawlerService {
async crawl(url: string): Promise<any> {
const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] });
async crawl(url: string) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const domain = this.extractDomain(url);
await page.goto(url);

View file

@ -10,8 +10,6 @@ export class DbService {
cssSheetsLocation: string[];
scriptsSheetsLocation: string[];
}, collection: string) {
console.log({data, collection});
}
}