You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import{Command}from'commander';import{typeDb,typeEventSourceType}from'db';import{typeClient}from'tdl';import{TelegramScrapper}from'./scrappers/telegram.scrapper.js';import{typeMq,typeRawEventJob}from'mq';import{typeScrapper}from'./scrappers/scrapper.js';import{CONTAINER,DB,LOGGER,MQ,TELEGRAM}from'../shared/container.js';import{typeLogger}from'logger';import{MeetupScrapper}from'./scrappers/meetup.scrapper.js';// TODO add options to scrap// - for all event sources// - for all event sources of a specific type// - for a specific event source by its uriexportconstSCRAP_COMMAND=newCommand('scrap').action(async()=>{constdb=CONTAINER.get<Db>(DB);consttelegram=CONTAINER.get<Client>(TELEGRAM);constmq=CONTAINER.get<Mq>(MQ);constlogger=CONTAINER.get<Logger>(LOGGER).clone('ScrapCommand');//// collect event sources//consteventSources=awaitdb.eventSources.findAll();logger.info(`collected [${eventSources.length}] event sources to scrap`);//// configure scrappers//constscrappers=newMap<EventSourceType,Scrapper>([// TODO fix proper generic types here to avoid manual type assertion['telegram',newTelegramScrapper(telegram,logger)asScrapper],['meetup',newMeetupScrapper(logger)asScrapper],]);//// process all event sources in parallel//constresults=awaitPromise.allSettled(eventSources.map(async(eventSource)=>{constscrapper=scrappers.get(eventSource.type);if(scrapper==null){thrownewError(`No scrapper is configured for event source type [${eventSource.type}]`,);}//// scrap event source//constcontents=awaitscrapper.scrapEventSource(eventSource);if(contents.length===0){return;}//// queue raw event jobs//constjobs: RawEventJob[]=contents.map((content)=>scrapper.createRawEventJob(eventSource,content),);awaitmq.rawEvents.queueJobsBulk(jobs);logger.info(`successfully queued [${jobs.length}] raw event jobs for event source [${eventSource.uri}]`,{uri: eventSource.uri},);//// update latest scrapped message id//awaitdb.eventSources.updateLatestScrappedMessageId(eventSource.id,scrapper.getScrappedMessageId(contents[0]),);}),);constesToResult=newMap(eventSources.map((eventSource,index)=>{constresult=results.at(index);if(result==null){thrownewError(`no scrap result is found for event source with index [${index}] and uri [${eventSource.uri}]`,);}return[eventSource,result];}),);//// log results//constesUriToReason=newMap<string,any>();for(const[eventSource,result]ofesToResult){if(result.status==='rejected'){esUriToReason.set(eventSource.uri,result.reason);}}constsuccessCount=esToResult.size-esUriToReason.size;logger.info(`successfully scrapped [${successCount}] event sources`);if(esUriToReason.size===0){logger.info(`no event source failed to scrap`);return;}for(const[uri,reason]ofesUriToReason){constmessage=reasoninstanceofError
? reason.message
: JSON.stringify(reason,null,2);logger.error(`event source [${uri}] finished with errors [${message}]`,{
uri,});}});
The text was updated successfully, but these errors were encountered:
for all event sources
for all event sources of a specific type
for a specific event source by its uri
const eventSources = await db.eventSources.findAll();
const scrappers = new Map<EventSourceType, Scrapper>([
https://api.github.com/vorant94/sofash/blob/fcb3ff714b19baf963f745027eebdd11e63af32c/apps/cli/src/scrap/index.ts#L11
The text was updated successfully, but these errors were encountered: