installing mongoose + continuing to rest api

This commit is contained in:
Kfir Dayan 2023-04-19 01:47:05 +03:00
parent 75dc340ec5
commit 275d22e045
12 changed files with 423 additions and 764 deletions

2
.gitignore vendored
View file

@ -4,7 +4,7 @@
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
#poppeteer's output #poppeteer's output
crawler_assets/**/* sites_assets/**/*
# User-specific stuff: # User-specific stuff:

1043
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -26,7 +26,6 @@
"axios": "^1.3.5", "axios": "^1.3.5",
"dotenv": "^16.0.3", "dotenv": "^16.0.3",
"mongoose": "^7.0.4", "mongoose": "^7.0.4",
"nest-mongodb": "^6.4.0",
"puppeteer": "^19.9.1", "puppeteer": "^19.9.1",
"reflect-metadata": "^0.1.13", "reflect-metadata": "^0.1.13",
"rxjs": "^7.5.5" "rxjs": "^7.5.5"

View file

@ -1,21 +1,68 @@
import { Body, Controller, Post } from '@nestjs/common'; import { Body, Controller, Delete, Get, Param, Post } from '@nestjs/common';
import { CrawlerService } from '../crawler/crawler.service'; import { CrawlerService } from '../crawler/crawler.service';
import { DbService } from '../db/db.service'; import { InjectModel } from '@nestjs/mongoose';
import { Model } from 'mongoose';
import { Site } from 'src/interfaces/site.interface';
@Controller('/') @Controller('/')
export class ApiController { export class ApiController {
constructor(private crawlerService: CrawlerService, private DbService: DbService) {} constructor(private crawlerService: CrawlerService, @InjectModel('Site') private readonly siteModel: Model<Site> ) {}
@Post('crawl') @Post('crawl')
async crawl(@Body() body: { url: string }) { async crawl(@Body() body: { url: string }) {
const results = this.crawlerService.crawl(body.url); const results = this.crawlerService.crawl(body.url);
results.then((data) => { results.then((data) => {
console.log(data) console.log("Done crawling !", data);
this.DbService.insert(data, 'crawler'); const newSite = new this.siteModel(data);
newSite.save().then((result) => {
console.log("Site saved !", result);
}).catch((err) => {
console.log("Error saving site !", err);
});
}).catch((err) => { }).catch((err) => {
console.log("** Error crawling ! **", err);
console.log(err); console.log(err);
}); });
return { return {
message: 'Crawling in progress' message: 'Got your request for ' + body.url
} }
} }
// Get all
@Get('sites')
async getSites() {
const sites = await this.siteModel.find().exec();
return sites || {};
}
// Get by id
@Get('sites/:id')
async getSite(@Param('id') id: string) {
const site = await this.siteModel.findById(id).exec();
return site || {};
}
// Get by domain
@Get('sites/domain/:domain')
async getSiteByDomain(@Param('domain') domain: string) {
const site = await this.siteModel.findOne({ domain }).exec();
return site || {};
}
// Delete by domain
@Delete('sites/domain/:domain')
async deleteSiteByDomain(@Param('domain') domain: string) {
const site = await this.siteModel.findOneAndDelete({ domain }).exec();
return site || {};
}
// Delete by id
@Delete('sites/:id')
async deleteSite(@Param('id') id: string) {
const site = await this.siteModel.findByIdAndDelete(id).exec();
return site || {};
}
} }

View file

@ -1,11 +1,13 @@
import { Module } from '@nestjs/common'; import { Module } from '@nestjs/common';
import { ApiController } from './api.controller'; import { ApiController } from './api.controller';
import { CrawlerService } from '../crawler/crawler.service'; import { CrawlerService } from '../crawler/crawler.service';
import { DbService } from '../db/db.service'; import { SitesSchema } from '../schema/sites.schema';
import { MongooseModule } from '@nestjs/mongoose';
@Module({ @Module({
imports: [MongooseModule.forFeature([{ name: 'Site', schema: SitesSchema }])],
controllers: [ApiController], controllers: [ApiController],
providers: [CrawlerService, DbService] providers: [CrawlerService]
}) })
export class ApiModule {} export class ApiModule {}

View file

@ -1,18 +1,20 @@
import { Module } from '@nestjs/common'; import { Module } from '@nestjs/common';
import { MongoModule } from 'nest-mongodb' import { MongooseModule } from '@nestjs/mongoose';
import { ApiModule } from './api/api.module'; import { ApiModule } from './api/api.module';
import { CrawlerModule } from './crawler/crawler.module'; import { CrawlerModule } from './crawler/crawler.module';
const dotenv = require('dotenv'); const dotenv = require('dotenv');
// import { DbModule } from './db/db.module';
dotenv.config(); dotenv.config();
console.log(process)
@Module({ @Module({
imports: [ imports: [
MongoModule.forRoot(process.env.DB_URI, process.env.DB_NAME), MongooseModule.forRoot(process.env.DB_URI, {
useNewUrlParser: true,
useUnifiedTopology: true,
}),
ApiModule, ApiModule,
CrawlerModule CrawlerModule
] ]
}) })
export class AppModule { } export class AppModule { }

View file

@ -1,9 +1,7 @@
import { Module } from '@nestjs/common'; import { Module } from '@nestjs/common';
import { CrawlerService } from './crawler.service'; import { CrawlerService } from './crawler.service';
import { DbModule } from '../db/db.module';
@Module({ @Module({
imports: [DbModule],
providers: [CrawlerService] providers: [CrawlerService]
}) })
export class CrawlerModule {} export class CrawlerModule {}

View file

@ -4,18 +4,18 @@ import * as fs from 'fs';
import puppeteer from 'puppeteer'; import puppeteer from 'puppeteer';
import { URL } from 'url'; import { URL } from 'url';
import axios from 'axios'; import axios from 'axios';
import { Site } from '../interfaces/site.interface';
const environment = process.env.NODE_ENV || 'development';
@Injectable() @Injectable()
export class CrawlerService { export class CrawlerService {
async crawl(url: string): Promise<any> {
console.log("start crawl", url); async crawl(url: string): Promise<Site> {
console.log("start crawl website", url);
const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] }); const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] });
const page = await browser.newPage(); const page = await browser.newPage();
const domain = this.extractDomain(url); const domain = this.extractDomain(url);
await page.goto(url); await page.goto(url);
const directory = `crawler_assets/${domain}/`; const directory = `sites_assets/${domain}/`;
if (!fs.existsSync(directory)) { if (!fs.existsSync(directory)) {
mkdirSync(directory); mkdirSync(directory);
} }
@ -49,6 +49,7 @@ export class CrawlerService {
await browser.close(); await browser.close();
return { return {
domain,
cssSheetsLocation, cssSheetsLocation,
scriptsSheetsLocation, scriptsSheetsLocation,
urlsList urlsList

View file

@ -1,7 +0,0 @@
import { Module } from '@nestjs/common';
import { DbService } from './db.service';
@Module({
providers: [DbService]
})
export class DbModule {}

View file

@ -1,17 +0,0 @@
import { Injectable } from '@nestjs/common';
@Injectable()
export class DbService {
constructor() {
console.log(`DbService constructor`);
}
insert(data: {
cssSheetsLocation: string[];
scriptsSheetsLocation: string[];
}, collection: string) {
console.log({data, collection});
}
}

View file

@ -1,8 +1,6 @@
import { ObjectId } from 'mongodb'; export interface Site {
export interface Cat {
_id: ObjectId;
domain: string; domain: string;
cssSheetsLocation: string[]; cssSheetsLocation: string[];
scriptsSheetsLocation: string[]; scriptsSheetsLocation: string[];
urlsList: string[];
} }

View file

@ -0,0 +1,21 @@
import { Prop, Schema, SchemaFactory } from '@nestjs/mongoose';
import { Document } from 'mongoose';
export type SitesDocument = Sites & Document;
@Schema()
export class Sites {
@Prop({ required: true, unique: true})
domain: string;
@Prop()
cssSheetsLocation: string[];
@Prop()
scriptsSheetsLocation: string[];
@Prop({ required: true})
urlsList: string[];
}
export const SitesSchema = SchemaFactory.createForClass(Sites);