save
This commit is contained in:
parent
dd55ad1490
commit
1d50519a96
|
@ -4,6 +4,7 @@
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "npx ts-node src/index.ts",
|
"dev": "npx ts-node src/index.ts",
|
||||||
|
"migrate": "npx ts-node src/mannual/migrate.ts",
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
},
|
},
|
||||||
"keywords": [],
|
"keywords": [],
|
||||||
|
|
20
src/index.ts
20
src/index.ts
|
@ -7,13 +7,25 @@ export type FetchType = {
|
||||||
const fetchList = [
|
const fetchList = [
|
||||||
{
|
{
|
||||||
name: 'AI写作工具',
|
name: 'AI写作工具',
|
||||||
typeId: '67908fc33de33b392c0330af',
|
typeId: '6790aae23de33b392c0330b2',
|
||||||
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
|
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: '',
|
name: 'AI图像网站',
|
||||||
typeId: ''
|
typeId: '67908fc33de33b392c0330af',
|
||||||
}
|
url: 'https://ai-bot.cn/favorites/best-ai-image-tools/'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'AI视频网站',
|
||||||
|
typeId: '67b6f0b7b139d1d6aa14cd06',
|
||||||
|
url: 'https://ai-bot.cn/favorites/ai-video-tools/'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'AI音频网站',
|
||||||
|
typeId: '6791a98fc058e55ed0a094ca',
|
||||||
|
url: 'https://ai-bot.cn/favorites/ai-audio-tools/'
|
||||||
|
},
|
||||||
|
|
||||||
]
|
]
|
||||||
function main() {
|
function main() {
|
||||||
console.log("Hello, this is the main function!");
|
console.log("Hello, this is the main function!");
|
||||||
|
|
|
@ -23,8 +23,8 @@ if (process.env.NODE_ENV === 'development') {
|
||||||
clientPromise = client.connect();
|
clientPromise = client.connect();
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getDb = async () => {
|
export const getDb = async (dbName?: string) => {
|
||||||
return (await clientPromise).db('crawler');
|
return (await clientPromise).db(dbName || 'crawler');
|
||||||
};
|
};
|
||||||
export const getCollection = async (collection: string) => {
|
export const getCollection = async (collection: string) => {
|
||||||
const ins = await getDb();
|
const ins = await getDb();
|
||||||
|
|
|
@ -4,7 +4,6 @@ import { downloadImage } from "../share/tools"
|
||||||
import { getCollection } from '../lib/mongodb';
|
import { getCollection } from '../lib/mongodb';
|
||||||
import { FetchType } from '..';
|
import { FetchType } from '..';
|
||||||
// 要抓取的网页 URL
|
// 要抓取的网页 URL
|
||||||
const url = 'https://ai-bot.cn/favorites/ai-writing-tools/';
|
|
||||||
|
|
||||||
async function getPageData(url: string, name: string) {
|
async function getPageData(url: string, name: string) {
|
||||||
try {
|
try {
|
||||||
|
@ -20,10 +19,13 @@ async function getPageData(url: string, name: string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
export async function queryListData(list: FetchType[]) {
|
export async function queryListData(list: FetchType[]) {
|
||||||
const promiseList = list.map(item => fetchData(item.typeId))
|
const col = await getCollection('link');
|
||||||
await Promise.all(promiseList)
|
await col.deleteMany({})
|
||||||
|
for (const item of list) {
|
||||||
|
await fetchData(item.typeId, item.url)
|
||||||
}
|
}
|
||||||
export async function fetchData(typeName: string) {
|
}
|
||||||
|
export async function fetchData(typeName: string, url: string) {
|
||||||
try {
|
try {
|
||||||
// 请求目标页面
|
// 请求目标页面
|
||||||
const { data } = await axios.get(url);
|
const { data } = await axios.get(url);
|
||||||
|
@ -69,7 +71,10 @@ export async function fetchData(typeName: string) {
|
||||||
|
|
||||||
toolsData.push(toolData);
|
toolsData.push(toolData);
|
||||||
i++
|
i++
|
||||||
console.log(`进度:${i}/${length}`);
|
console.clear()
|
||||||
|
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`);
|
||||||
|
|
||||||
|
console.log(`${typeName}:进度:${i}/${length}`);
|
||||||
|
|
||||||
}
|
}
|
||||||
console.log(toolsData);
|
console.log(toolsData);
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
import { getCollection, getDb } from "../lib/mongodb";
|
||||||
|
|
||||||
|
async function migrateLink() {
|
||||||
|
const botDb = await getDb('ai-bot');
|
||||||
|
const botCol = botDb.collection('link');
|
||||||
|
const col = await getCollection('link')
|
||||||
|
const links = await col.find().toArray();
|
||||||
|
await botCol.deleteMany({});
|
||||||
|
await botCol.insertMany(links);
|
||||||
|
console.log('Migrate link success');
|
||||||
|
}
|
||||||
|
migrateLink();
|
|
@ -38,7 +38,7 @@ export async function downloadImage(url: string) {
|
||||||
|
|
||||||
// 写入文件
|
// 写入文件
|
||||||
await fs.writeFile(filePath, response.data);
|
await fs.writeFile(filePath, response.data);
|
||||||
return id;
|
return filename;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('下载失败:', error);
|
console.error('下载失败:', error);
|
||||||
throw error;
|
throw error;
|
||||||
|
|
Loading…
Reference in New Issue