installing mongoose + continuing to rest api
This commit is contained in:
parent
75dc340ec5
commit
275d22e045
12 changed files with 423 additions and 764 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -4,7 +4,7 @@
|
||||||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
#poppeteer's output
|
#poppeteer's output
|
||||||
crawler_assets/**/*
|
sites_assets/**/*
|
||||||
|
|
||||||
|
|
||||||
# User-specific stuff:
|
# User-specific stuff:
|
||||||
|
|
1043
package-lock.json
generated
1043
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -26,7 +26,6 @@
|
||||||
"axios": "^1.3.5",
|
"axios": "^1.3.5",
|
||||||
"dotenv": "^16.0.3",
|
"dotenv": "^16.0.3",
|
||||||
"mongoose": "^7.0.4",
|
"mongoose": "^7.0.4",
|
||||||
"nest-mongodb": "^6.4.0",
|
|
||||||
"puppeteer": "^19.9.1",
|
"puppeteer": "^19.9.1",
|
||||||
"reflect-metadata": "^0.1.13",
|
"reflect-metadata": "^0.1.13",
|
||||||
"rxjs": "^7.5.5"
|
"rxjs": "^7.5.5"
|
||||||
|
|
|
@ -1,21 +1,68 @@
|
||||||
import { Body, Controller, Post } from '@nestjs/common';
|
import { Body, Controller, Delete, Get, Param, Post } from '@nestjs/common';
|
||||||
import { CrawlerService } from '../crawler/crawler.service';
|
import { CrawlerService } from '../crawler/crawler.service';
|
||||||
import { DbService } from '../db/db.service';
|
import { InjectModel } from '@nestjs/mongoose';
|
||||||
|
import { Model } from 'mongoose';
|
||||||
|
import { Site } from 'src/interfaces/site.interface';
|
||||||
|
|
||||||
@Controller('/')
|
@Controller('/')
|
||||||
export class ApiController {
|
export class ApiController {
|
||||||
constructor(private crawlerService: CrawlerService, private DbService: DbService) {}
|
constructor(private crawlerService: CrawlerService, @InjectModel('Site') private readonly siteModel: Model<Site> ) {}
|
||||||
@Post('crawl')
|
@Post('crawl')
|
||||||
async crawl(@Body() body: { url: string }) {
|
async crawl(@Body() body: { url: string }) {
|
||||||
|
|
||||||
const results = this.crawlerService.crawl(body.url);
|
const results = this.crawlerService.crawl(body.url);
|
||||||
results.then((data) => {
|
results.then((data) => {
|
||||||
console.log(data)
|
console.log("Done crawling !", data);
|
||||||
this.DbService.insert(data, 'crawler');
|
const newSite = new this.siteModel(data);
|
||||||
|
newSite.save().then((result) => {
|
||||||
|
console.log("Site saved !", result);
|
||||||
|
}).catch((err) => {
|
||||||
|
console.log("Error saving site !", err);
|
||||||
|
});
|
||||||
}).catch((err) => {
|
}).catch((err) => {
|
||||||
|
console.log("** Error crawling ! **", err);
|
||||||
console.log(err);
|
console.log(err);
|
||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
message: 'Crawling in progress'
|
message: 'Got your request for ' + body.url
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get all
|
||||||
|
@Get('sites')
|
||||||
|
async getSites() {
|
||||||
|
const sites = await this.siteModel.find().exec();
|
||||||
|
return sites || {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get by id
|
||||||
|
@Get('sites/:id')
|
||||||
|
async getSite(@Param('id') id: string) {
|
||||||
|
const site = await this.siteModel.findById(id).exec();
|
||||||
|
return site || {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get by domain
|
||||||
|
@Get('sites/domain/:domain')
|
||||||
|
async getSiteByDomain(@Param('domain') domain: string) {
|
||||||
|
const site = await this.siteModel.findOne({ domain }).exec();
|
||||||
|
return site || {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete by domain
|
||||||
|
@Delete('sites/domain/:domain')
|
||||||
|
async deleteSiteByDomain(@Param('domain') domain: string) {
|
||||||
|
const site = await this.siteModel.findOneAndDelete({ domain }).exec();
|
||||||
|
return site || {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete by id
|
||||||
|
@Delete('sites/:id')
|
||||||
|
async deleteSite(@Param('id') id: string) {
|
||||||
|
const site = await this.siteModel.findByIdAndDelete(id).exec();
|
||||||
|
return site || {};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
import { Module } from '@nestjs/common';
|
import { Module } from '@nestjs/common';
|
||||||
import { ApiController } from './api.controller';
|
import { ApiController } from './api.controller';
|
||||||
import { CrawlerService } from '../crawler/crawler.service';
|
import { CrawlerService } from '../crawler/crawler.service';
|
||||||
import { DbService } from '../db/db.service';
|
import { SitesSchema } from '../schema/sites.schema';
|
||||||
|
import { MongooseModule } from '@nestjs/mongoose';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
|
imports: [MongooseModule.forFeature([{ name: 'Site', schema: SitesSchema }])],
|
||||||
controllers: [ApiController],
|
controllers: [ApiController],
|
||||||
providers: [CrawlerService, DbService]
|
providers: [CrawlerService]
|
||||||
|
|
||||||
})
|
})
|
||||||
export class ApiModule {}
|
export class ApiModule {}
|
||||||
|
|
|
@ -1,18 +1,20 @@
|
||||||
import { Module } from '@nestjs/common';
|
import { Module } from '@nestjs/common';
|
||||||
import { MongoModule } from 'nest-mongodb'
|
import { MongooseModule } from '@nestjs/mongoose';
|
||||||
import { ApiModule } from './api/api.module';
|
import { ApiModule } from './api/api.module';
|
||||||
import { CrawlerModule } from './crawler/crawler.module';
|
import { CrawlerModule } from './crawler/crawler.module';
|
||||||
const dotenv = require('dotenv');
|
const dotenv = require('dotenv');
|
||||||
// import { DbModule } from './db/db.module';
|
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
console.log(process)
|
|
||||||
@Module({
|
@Module({
|
||||||
imports: [
|
imports: [
|
||||||
MongoModule.forRoot(process.env.DB_URI, process.env.DB_NAME),
|
MongooseModule.forRoot(process.env.DB_URI, {
|
||||||
|
useNewUrlParser: true,
|
||||||
|
useUnifiedTopology: true,
|
||||||
|
}),
|
||||||
ApiModule,
|
ApiModule,
|
||||||
CrawlerModule
|
CrawlerModule
|
||||||
]
|
]
|
||||||
|
|
||||||
})
|
})
|
||||||
export class AppModule { }
|
export class AppModule { }
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
import { Module } from '@nestjs/common';
|
import { Module } from '@nestjs/common';
|
||||||
import { CrawlerService } from './crawler.service';
|
import { CrawlerService } from './crawler.service';
|
||||||
import { DbModule } from '../db/db.module';
|
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
imports: [DbModule],
|
|
||||||
providers: [CrawlerService]
|
providers: [CrawlerService]
|
||||||
})
|
})
|
||||||
export class CrawlerModule {}
|
export class CrawlerModule {}
|
||||||
|
|
|
@ -4,18 +4,18 @@ import * as fs from 'fs';
|
||||||
import puppeteer from 'puppeteer';
|
import puppeteer from 'puppeteer';
|
||||||
import { URL } from 'url';
|
import { URL } from 'url';
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
|
import { Site } from '../interfaces/site.interface';
|
||||||
const environment = process.env.NODE_ENV || 'development';
|
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class CrawlerService {
|
export class CrawlerService {
|
||||||
async crawl(url: string): Promise<any> {
|
|
||||||
console.log("start crawl", url);
|
async crawl(url: string): Promise<Site> {
|
||||||
|
console.log("start crawl website", url);
|
||||||
const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] });
|
const browser = await puppeteer.launch({ headless: true, args: ['--no-sandbox'] });
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
const domain = this.extractDomain(url);
|
const domain = this.extractDomain(url);
|
||||||
await page.goto(url);
|
await page.goto(url);
|
||||||
const directory = `crawler_assets/${domain}/`;
|
const directory = `sites_assets/${domain}/`;
|
||||||
if (!fs.existsSync(directory)) {
|
if (!fs.existsSync(directory)) {
|
||||||
mkdirSync(directory);
|
mkdirSync(directory);
|
||||||
}
|
}
|
||||||
|
@ -49,6 +49,7 @@ export class CrawlerService {
|
||||||
|
|
||||||
await browser.close();
|
await browser.close();
|
||||||
return {
|
return {
|
||||||
|
domain,
|
||||||
cssSheetsLocation,
|
cssSheetsLocation,
|
||||||
scriptsSheetsLocation,
|
scriptsSheetsLocation,
|
||||||
urlsList
|
urlsList
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
import { Module } from '@nestjs/common';
|
|
||||||
import { DbService } from './db.service';
|
|
||||||
|
|
||||||
@Module({
|
|
||||||
providers: [DbService]
|
|
||||||
})
|
|
||||||
export class DbModule {}
|
|
|
@ -1,17 +0,0 @@
|
||||||
import { Injectable } from '@nestjs/common';
|
|
||||||
|
|
||||||
@Injectable()
|
|
||||||
export class DbService {
|
|
||||||
constructor() {
|
|
||||||
console.log(`DbService constructor`);
|
|
||||||
}
|
|
||||||
|
|
||||||
insert(data: {
|
|
||||||
cssSheetsLocation: string[];
|
|
||||||
scriptsSheetsLocation: string[];
|
|
||||||
}, collection: string) {
|
|
||||||
|
|
||||||
console.log({data, collection});
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,8 +1,6 @@
|
||||||
import { ObjectId } from 'mongodb';
|
export interface Site {
|
||||||
|
|
||||||
export interface Cat {
|
|
||||||
_id: ObjectId;
|
|
||||||
domain: string;
|
domain: string;
|
||||||
cssSheetsLocation: string[];
|
cssSheetsLocation: string[];
|
||||||
scriptsSheetsLocation: string[];
|
scriptsSheetsLocation: string[];
|
||||||
}
|
urlsList: string[];
|
||||||
|
}
|
21
src/schema/sites.schema.ts
Normal file
21
src/schema/sites.schema.ts
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
import { Prop, Schema, SchemaFactory } from '@nestjs/mongoose';
|
||||||
|
import { Document } from 'mongoose';
|
||||||
|
|
||||||
|
export type SitesDocument = Sites & Document;
|
||||||
|
|
||||||
|
@Schema()
|
||||||
|
export class Sites {
|
||||||
|
@Prop({ required: true, unique: true})
|
||||||
|
domain: string;
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
cssSheetsLocation: string[];
|
||||||
|
|
||||||
|
@Prop()
|
||||||
|
scriptsSheetsLocation: string[];
|
||||||
|
|
||||||
|
@Prop({ required: true})
|
||||||
|
urlsList: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export const SitesSchema = SchemaFactory.createForClass(Sites);
|
Loading…
Reference in a new issue