save
This commit is contained in:
parent
f8dc38e673
commit
dd55ad1490
|
@ -0,0 +1,5 @@
|
|||
MONGODB_URI=mongodb://expdsn:58662@expdsn.cloud:27017
|
||||
# MONGODB_URI=mongodb://expdsn:58662@localhost:27017
|
||||
SESSION_SECRET=lREDRcaFwZIzM7Rjw63XGj8trTyMqhVUsVwwhuTQnFs=
|
||||
ALIYUN_RAM_ACCESS_KEY_ID=LTAI5tNzopZHJFa2Q9vqr1u5
|
||||
ALIYUN_RAM_ACCESS_KEY_SECRET=qPu7fyft0KJ1l6SGqbS71IW0vDbRlr
|
|
@ -14,6 +14,7 @@
|
|||
"axios": "^1.7.9",
|
||||
"cheerio": "^1.0.0",
|
||||
"g": "^2.0.1",
|
||||
"mongodb": "^6.13.0",
|
||||
"puppeteer": "^24.2.1",
|
||||
"uuid": "^11.0.5"
|
||||
},
|
||||
|
|
112
pnpm-lock.yaml
112
pnpm-lock.yaml
|
@ -17,6 +17,9 @@ importers:
|
|||
g:
|
||||
specifier: ^2.0.1
|
||||
version: 2.0.1
|
||||
mongodb:
|
||||
specifier: ^6.13.0
|
||||
version: 6.13.0(socks@2.8.4)
|
||||
puppeteer:
|
||||
specifier: ^24.2.1
|
||||
version: 24.2.1(typescript@5.7.3)
|
||||
|
@ -61,6 +64,9 @@ packages:
|
|||
'@jridgewell/trace-mapping@0.3.9':
|
||||
resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
|
||||
|
||||
'@mongodb-js/saslprep@1.2.0':
|
||||
resolution: {integrity: sha512-+ywrb0AqkfaYuhHs6LxKWgqbh3I72EpEgESCw37o+9qPx9WTCkgDm2B+eMrwehGtHBWHFU4GXvnSCNiFhhausg==}
|
||||
|
||||
'@puppeteer/browsers@2.7.1':
|
||||
resolution: {integrity: sha512-MK7rtm8JjaxPN7Mf1JdZIZKPD2Z+W7osvrC1vjpvfOX1K0awDIHYbNi89f7eotp7eMUn2shWnt03HwVbriXtKQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
@ -87,6 +93,12 @@ packages:
|
|||
'@types/uuid@10.0.0':
|
||||
resolution: {integrity: sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==}
|
||||
|
||||
'@types/webidl-conversions@7.0.3':
|
||||
resolution: {integrity: sha512-CiJJvcRtIgzadHCYXw7dqEnMNRjhGZlYK05Mj9OyktqV8uVT8fD2BFOB7S1uwBE3Kj2Z+4UyPmFw/Ixgw/LAlA==}
|
||||
|
||||
'@types/whatwg-url@11.0.5':
|
||||
resolution: {integrity: sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==}
|
||||
|
||||
'@types/yauzl@2.10.3':
|
||||
resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==}
|
||||
|
||||
|
@ -162,6 +174,10 @@ packages:
|
|||
boolbase@1.0.0:
|
||||
resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
|
||||
|
||||
bson@6.10.3:
|
||||
resolution: {integrity: sha512-MTxGsqgYTwfshYWTRdmZRC+M7FnG1b4y7RO7p2k3X24Wq0yv1m77Wsj0BzlPzd/IowgESfsruQCUToa7vbOpPQ==}
|
||||
engines: {node: '>=16.20.1'}
|
||||
|
||||
buffer-crc32@0.2.13:
|
||||
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
|
||||
|
||||
|
@ -445,6 +461,9 @@ packages:
|
|||
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
|
||||
engines: {node: '>= 0.4'}
|
||||
|
||||
memory-pager@1.5.0:
|
||||
resolution: {integrity: sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==}
|
||||
|
||||
mime-db@1.52.0:
|
||||
resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==}
|
||||
engines: {node: '>= 0.6'}
|
||||
|
@ -456,6 +475,36 @@ packages:
|
|||
mitt@3.0.1:
|
||||
resolution: {integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==}
|
||||
|
||||
mongodb-connection-string-url@3.0.2:
|
||||
resolution: {integrity: sha512-rMO7CGo/9BFwyZABcKAWL8UJwH/Kc2x0g72uhDWzG48URRax5TCIcJ7Rc3RZqffZzO/Gwff/jyKwCU9TN8gehA==}
|
||||
|
||||
mongodb@6.13.0:
|
||||
resolution: {integrity: sha512-KeESYR5TEaFxOuwRqkOm3XOsMqCSkdeDMjaW5u2nuKfX7rqaofp7JQGoi7sVqQcNJTKuveNbzZtWMstb8ABP6Q==}
|
||||
engines: {node: '>=16.20.1'}
|
||||
peerDependencies:
|
||||
'@aws-sdk/credential-providers': ^3.188.0
|
||||
'@mongodb-js/zstd': ^1.1.0 || ^2.0.0
|
||||
gcp-metadata: ^5.2.0
|
||||
kerberos: ^2.0.1
|
||||
mongodb-client-encryption: '>=6.0.0 <7'
|
||||
snappy: ^7.2.2
|
||||
socks: ^2.7.1
|
||||
peerDependenciesMeta:
|
||||
'@aws-sdk/credential-providers':
|
||||
optional: true
|
||||
'@mongodb-js/zstd':
|
||||
optional: true
|
||||
gcp-metadata:
|
||||
optional: true
|
||||
kerberos:
|
||||
optional: true
|
||||
mongodb-client-encryption:
|
||||
optional: true
|
||||
snappy:
|
||||
optional: true
|
||||
socks:
|
||||
optional: true
|
||||
|
||||
ms@2.1.3:
|
||||
resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
|
||||
|
||||
|
@ -514,6 +563,10 @@ packages:
|
|||
pump@3.0.2:
|
||||
resolution: {integrity: sha512-tUPXtzlGM8FE3P0ZL6DVs/3P58k9nk8/jZeQCurTJylQA8qFYzHFfhBJkuqyE0FifOsQ0uKWekiZ5g8wtr28cw==}
|
||||
|
||||
punycode@2.3.1:
|
||||
resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
|
||||
engines: {node: '>=6'}
|
||||
|
||||
puppeteer-core@24.2.1:
|
||||
resolution: {integrity: sha512-bCypUh3WXzETafv1TCFAjIUnI8BiQ/d+XvEfEXDLcIMm9CAvROqnBmbt79yBjwasoDZsgfXnUmIJU7Y27AalVQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
@ -555,6 +608,9 @@ packages:
|
|||
resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
sparse-bitfield@3.0.3:
|
||||
resolution: {integrity: sha512-kvzhi7vqKTfkh0PZU+2D2PIllw2ymqJKujUcyPMd9Y75Nv4nPbGJZXNhxsgdQab2BmlDct1YnfQCguEvHr7VsQ==}
|
||||
|
||||
sprintf-js@1.1.3:
|
||||
resolution: {integrity: sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==}
|
||||
|
||||
|
@ -578,6 +634,10 @@ packages:
|
|||
text-decoder@1.2.3:
|
||||
resolution: {integrity: sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==}
|
||||
|
||||
tr46@5.0.0:
|
||||
resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
ts-node@10.9.2:
|
||||
resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==}
|
||||
hasBin: true
|
||||
|
@ -617,6 +677,10 @@ packages:
|
|||
v8-compile-cache-lib@3.0.1:
|
||||
resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==}
|
||||
|
||||
webidl-conversions@7.0.0:
|
||||
resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==}
|
||||
engines: {node: '>=12'}
|
||||
|
||||
whatwg-encoding@3.1.1:
|
||||
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
@ -625,6 +689,10 @@ packages:
|
|||
resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
whatwg-url@14.1.1:
|
||||
resolution: {integrity: sha512-mDGf9diDad/giZ/Sm9Xi2YcyzaFpbdLpJPr+E9fSkyQ7KpQD4SdFcugkRQYzhmfI4KeV4Qpnn2sKPdo+kmsgRQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
wrap-ansi@7.0.0:
|
||||
resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
|
||||
engines: {node: '>=10'}
|
||||
|
@ -689,6 +757,10 @@ snapshots:
|
|||
'@jridgewell/resolve-uri': 3.1.2
|
||||
'@jridgewell/sourcemap-codec': 1.5.0
|
||||
|
||||
'@mongodb-js/saslprep@1.2.0':
|
||||
dependencies:
|
||||
sparse-bitfield: 3.0.3
|
||||
|
||||
'@puppeteer/browsers@2.7.1':
|
||||
dependencies:
|
||||
debug: 4.4.0
|
||||
|
@ -718,6 +790,12 @@ snapshots:
|
|||
|
||||
'@types/uuid@10.0.0': {}
|
||||
|
||||
'@types/webidl-conversions@7.0.3': {}
|
||||
|
||||
'@types/whatwg-url@11.0.5':
|
||||
dependencies:
|
||||
'@types/webidl-conversions': 7.0.3
|
||||
|
||||
'@types/yauzl@2.10.3':
|
||||
dependencies:
|
||||
'@types/node': 22.13.4
|
||||
|
@ -788,6 +866,8 @@ snapshots:
|
|||
|
||||
boolbase@1.0.0: {}
|
||||
|
||||
bson@6.10.3: {}
|
||||
|
||||
buffer-crc32@0.2.13: {}
|
||||
|
||||
call-bind-apply-helpers@1.0.2:
|
||||
|
@ -1085,6 +1165,8 @@ snapshots:
|
|||
|
||||
math-intrinsics@1.1.0: {}
|
||||
|
||||
memory-pager@1.5.0: {}
|
||||
|
||||
mime-db@1.52.0: {}
|
||||
|
||||
mime-types@2.1.35:
|
||||
|
@ -1093,6 +1175,19 @@ snapshots:
|
|||
|
||||
mitt@3.0.1: {}
|
||||
|
||||
mongodb-connection-string-url@3.0.2:
|
||||
dependencies:
|
||||
'@types/whatwg-url': 11.0.5
|
||||
whatwg-url: 14.1.1
|
||||
|
||||
mongodb@6.13.0(socks@2.8.4):
|
||||
dependencies:
|
||||
'@mongodb-js/saslprep': 1.2.0
|
||||
bson: 6.10.3
|
||||
mongodb-connection-string-url: 3.0.2
|
||||
optionalDependencies:
|
||||
socks: 2.8.4
|
||||
|
||||
ms@2.1.3: {}
|
||||
|
||||
netmask@2.0.2: {}
|
||||
|
@ -1173,6 +1268,8 @@ snapshots:
|
|||
end-of-stream: 1.4.4
|
||||
once: 1.4.0
|
||||
|
||||
punycode@2.3.1: {}
|
||||
|
||||
puppeteer-core@24.2.1:
|
||||
dependencies:
|
||||
'@puppeteer/browsers': 2.7.1
|
||||
|
@ -1228,6 +1325,10 @@ snapshots:
|
|||
source-map@0.6.1:
|
||||
optional: true
|
||||
|
||||
sparse-bitfield@3.0.3:
|
||||
dependencies:
|
||||
memory-pager: 1.5.0
|
||||
|
||||
sprintf-js@1.1.3: {}
|
||||
|
||||
streamx@2.22.0:
|
||||
|
@ -1267,6 +1368,10 @@ snapshots:
|
|||
dependencies:
|
||||
b4a: 1.6.7
|
||||
|
||||
tr46@5.0.0:
|
||||
dependencies:
|
||||
punycode: 2.3.1
|
||||
|
||||
ts-node@10.9.2(@types/node@22.13.4)(typescript@5.7.3):
|
||||
dependencies:
|
||||
'@cspotcode/source-map-support': 0.8.1
|
||||
|
@ -1299,12 +1404,19 @@ snapshots:
|
|||
|
||||
v8-compile-cache-lib@3.0.1: {}
|
||||
|
||||
webidl-conversions@7.0.0: {}
|
||||
|
||||
whatwg-encoding@3.1.1:
|
||||
dependencies:
|
||||
iconv-lite: 0.6.3
|
||||
|
||||
whatwg-mimetype@4.0.0: {}
|
||||
|
||||
whatwg-url@14.1.1:
|
||||
dependencies:
|
||||
tr46: 5.0.0
|
||||
webidl-conversions: 7.0.0
|
||||
|
||||
wrap-ansi@7.0.0:
|
||||
dependencies:
|
||||
ansi-styles: 4.3.0
|
||||
|
|
30
src/index.ts
30
src/index.ts
|
@ -1,10 +1,24 @@
|
|||
import { fetchData } from "./link";
|
||||
|
||||
function main() {
|
||||
console.log("Hello, this is the main function!");
|
||||
fetchData("AI写作工具")
|
||||
// test()
|
||||
import { fetchData, queryListData } from "./link";
|
||||
export type FetchType = {
|
||||
typeId: string;
|
||||
url: string;
|
||||
name?: string;
|
||||
}
|
||||
const fetchList = [
|
||||
{
|
||||
name: 'AI写作工具',
|
||||
typeId: '67908fc33de33b392c0330af',
|
||||
url: 'https://ai-bot.cn/favorites/ai-writing-tools/'
|
||||
},
|
||||
{
|
||||
name: '',
|
||||
typeId: ''
|
||||
}
|
||||
]
|
||||
function main() {
|
||||
console.log("Hello, this is the main function!");
|
||||
queryListData(fetchList)
|
||||
// test()
|
||||
}
|
||||
|
||||
main();
|
||||
|
||||
main();
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
// lib/mongodb.ts
|
||||
import { MongoClient, Db } from 'mongodb';
|
||||
|
||||
|
||||
const uri = "mongodb://expdsn:58662@expdsn.cloud:27017";
|
||||
const options = {};
|
||||
let client: MongoClient;
|
||||
let clientPromise: Promise<MongoClient>;
|
||||
|
||||
if (!uri) {
|
||||
throw new Error('Please add your Mongo URI to.env.local');
|
||||
}
|
||||
|
||||
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
if (!(global as any)._mongoClientPromise) {
|
||||
client = new MongoClient(uri, options);
|
||||
(global as any)._mongoClientPromise = client.connect();
|
||||
}
|
||||
clientPromise = (global as any)._mongoClientPromise;
|
||||
} else {
|
||||
client = new MongoClient(uri, options);
|
||||
clientPromise = client.connect();
|
||||
}
|
||||
|
||||
export const getDb = async () => {
|
||||
return (await clientPromise).db('crawler');
|
||||
};
|
||||
export const getCollection = async (collection: string) => {
|
||||
const ins = await getDb();
|
||||
return ins.collection(collection);
|
||||
};
|
|
@ -1,6 +1,8 @@
|
|||
import axios from 'axios';
|
||||
const cheerio = require('cheerio')
|
||||
import { downloadImage } from "../share/tools"
|
||||
import { getCollection } from '../lib/mongodb';
|
||||
import { FetchType } from '..';
|
||||
// 要抓取的网页 URL
|
||||
const url = 'https://ai-bot.cn/favorites/ai-writing-tools/';
|
||||
|
||||
|
@ -17,6 +19,10 @@ async function getPageData(url: string, name: string) {
|
|||
|
||||
}
|
||||
}
|
||||
export async function queryListData(list: FetchType[]) {
|
||||
const promiseList = list.map(item => fetchData(item.typeId))
|
||||
await Promise.all(promiseList)
|
||||
}
|
||||
export async function fetchData(typeName: string) {
|
||||
try {
|
||||
// 请求目标页面
|
||||
|
@ -28,6 +34,9 @@ export async function fetchData(typeName: string) {
|
|||
|
||||
// 提取工具卡片数据
|
||||
const toolsData = [] as any;
|
||||
const length = $('.url-card').length
|
||||
console.log(`正在爬取${typeName}类别的数据,共${length}条数据`);
|
||||
let i = 0
|
||||
for (const element of $('.url-card')) {
|
||||
const name = $(element).find('.url-info strong').text().trim();
|
||||
const tempLink = $(element).find('a').attr('href') as string;
|
||||
|
@ -59,12 +68,15 @@ export async function fetchData(typeName: string) {
|
|||
};
|
||||
|
||||
toolsData.push(toolData);
|
||||
console.log('完成' );
|
||||
i++
|
||||
console.log(`进度:${i}/${length}`);
|
||||
|
||||
}
|
||||
console.log(toolsData);
|
||||
|
||||
|
||||
const col = await getCollection('link');
|
||||
col.insertMany(toolsData);
|
||||
console.log('数据插入成功');
|
||||
} catch (error) {
|
||||
console.error('Error fetching data:', error);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue