Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setup custom listing processors #86

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ db/
npm-debug.log
.DS_Store
.idea
dist/
35 changes: 21 additions & 14 deletions index.js → index.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,20 @@
import fs from 'fs';
import { config } from './lib/utils.js';
import * as similarityCache from './lib/services/similarity-check/similarityCache.js';
import { setLastJobExecution } from './lib/services/storage/listingsStorage.js';
import * as jobStorage from './lib/services/storage/jobStorage.js';
import FredyRuntime from './lib/FredyRuntime.js';
import { duringWorkingHoursOrNotSet } from './lib/utils.js';
import './lib/api/api.js';
import { ProviderJobInformation, providers } from './lib/provider/provider.js';
//if db folder does not exist, ensure to create it before loading anything else
if (!fs.existsSync('./db')) {
fs.mkdirSync('./db');
}
const path = './lib/provider';
const provider = fs.readdirSync(path).filter((file) => file.endsWith('.js'));
//assuming interval is always in minutes
const INTERVAL = config.interval * 60 * 1000;
/* eslint-disable no-console */
console.log(`Started Fredy successfully. Ui can be accessed via http://localhost:${config.port}`);
/* eslint-enable no-console */
const fetchedProvider = await Promise.all(
provider.filter((provider) => provider.endsWith('.js')).map(async (pro) => import(`${path}/${pro}`))
);

setInterval(
(function exec() {
Expand All @@ -30,14 +25,26 @@ setInterval(
.getJobs()
.filter((job) => job.enabled)
.forEach((job) => {
job.provider
.filter((p) => fetchedProvider.find((fp) => fp.metaInformation.id === p.id) != null)
.forEach(async (prov) => {
const pro = fetchedProvider.find((fp) => fp.metaInformation.id === prov.id);
pro.init(prov, job.blacklist);
await new FredyRuntime(pro.config, job.notificationAdapter, prov.id, job.id, similarityCache).execute();
setLastJobExecution(job.id);
});
const validJobProviders: ProviderJobInformation[] = job.provider.filter(
(provider: ProviderJobInformation) => {
const hasExistingProvider =
providers.find((loadedProvider) => loadedProvider.metaInformation.id === provider.id) != null;
return hasExistingProvider;
}
);
validJobProviders.forEach(async (jobProvider) => {
const provider = providers.find((provider) => provider.metaInformation.id === jobProvider.id)!;
provider.init(jobProvider, job.blacklist);

await new FredyRuntime(
provider.config,
job.notificationAdapter,
jobProvider.id,
job.id,
job.listingProcessors
).execute();
setLastJobExecution(job.id);
});
});
} else {
/* eslint-disable no-console */
Expand Down
122 changes: 67 additions & 55 deletions lib/FredyRuntime.js → lib/FredyRuntime.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
import { NoNewListingsWarning } from './errors.js';
import { setKnownListings, getKnownListings } from './services/storage/listingsStorage.js';
import * as notify from './notification/notify.js';
import * as process from './processors/process.js';
import xray from './services/scraper.js';
import * as scrapingAnt from './services/scrapingAnt.js';
import urlModifier from './services/queryStringMutator.js';
import { Listing, ProviderConfig } from './provider/provider.js';
import { ProcessorConfig } from './processors/Processor.js';

class FredyRuntime {
private providerConfig: ProviderConfig;
private notificationAdapterConfigs: notify.NotifierAdapterConfig[];
private providerId: string;
private jobKey: string;
private listingProcessors?: ProcessorConfig[];

/**
*
* @param providerConfig the config for the specific provider, we're going to query at the moment
Expand All @@ -13,17 +23,24 @@ class FredyRuntime {
* @param jobKey key of the job that is currently running (from within the config)
* @param similarityCache cache instance holding values to check for similarity of entries
*/
constructor(providerConfig, notificationConfig, providerId, jobKey, similarityCache) {
this._providerConfig = providerConfig;
this._notificationConfig = notificationConfig;
this._providerId = providerId;
this._jobKey = jobKey;
this._similarityCache = similarityCache;
constructor(
providerConfig: ProviderConfig,
notificationConfig: notify.NotifierAdapterConfig[],
providerId: string,
jobKey: string,
listingProcessors?: ProcessorConfig[]
) {
this.providerConfig = providerConfig;
this.notificationAdapterConfigs = notificationConfig;
this.providerId = providerId;
this.jobKey = jobKey;
this.listingProcessors = listingProcessors;
console.log('Setup freddy runtime');
}
execute() {
return (
//modify the url to make sure search order is correctly set
Promise.resolve(urlModifier(this._providerConfig.url, this._providerConfig.sortByDateParam))
Promise.resolve(urlModifier(this.providerConfig.url, this.providerConfig.sortByDateParam))
//scraping the site and try finding new listings
.then(this._getListings.bind(this))
//bring them in a proper form (dictated by the provider)
Expand All @@ -34,97 +51,92 @@ class FredyRuntime {
.then(this._findNew.bind(this))
//store everything in db
.then(this._save.bind(this))
//check for similar listings. if found, remove them before notifying
.then(this._filterBySimilarListings.bind(this))
//process all listing using global processors + job configured processors
.then(this._processListings.bind(this))
//notify the user using the configured notification adapter
.then(this._notify.bind(this))
//if an error occurred on the way, handle it here.
.catch(this._handleError.bind(this))
);
}
_getListings(url) {
_getListings(url: string) {
return new Promise((resolve, reject) => {
const id = this._providerId;
const id = this.providerId;
if (scrapingAnt.needScrapingAnt(id) && !scrapingAnt.isScrapingAntApiKeySet()) {
const error = 'Immoscout or Immonet can only be used with if you have set an apikey for scrapingAnt.';
/* eslint-disable no-console */
console.log(error);

/* eslint-enable no-console */
reject(error);
return;
}
const u = scrapingAnt.needScrapingAnt(id) ? scrapingAnt.transformUrlForScrapingAnt(url, id) : url;
try {
if (this._providerConfig.paginate != null) {
xray(u, this._providerConfig.crawlContainer, [this._providerConfig.crawlFields])
//the first 2 pages should be enough here
.limit(2)
.paginate(this._providerConfig.paginate)
.then((listings) => {
resolve(listings == null ? [] : listings);
})
.catch((err) => {
reject(err);
console.error(err);
});
} else {
xray(u, this._providerConfig.crawlContainer, [this._providerConfig.crawlFields])
.then((listings) => {
resolve(listings == null ? [] : listings);
})
.catch((err) => {
reject(err);
console.error(err);
});
const xrayPromise = xray(u, this.providerConfig.crawlContainer, [this.providerConfig.crawlFields]);

if (this.providerConfig.paginate != null) {
//the first 2 pages should be enough here
xrayPromise.limit(2).paginate(this.providerConfig.paginate);
}

xrayPromise
.then((listings) => {
resolve(listings == null ? [] : listings);
})
.catch((err) => {
reject(err);
console.error(err);
});
} catch (error) {
reject(error);
console.error(error);
}
});
}
_normalize(listings) {
return listings.map(this._providerConfig.normalize);

_normalize(listings: Listing[]): Listing[] {
return listings.map(this.providerConfig.normalize);
}
_filter(listings) {
return listings.filter(this._providerConfig.filter);
_filter(listings: Listing[]): Listing[] {
return listings.filter(this.providerConfig.filter);
}
_findNew(listings) {
const newListings = listings.filter((o) => getKnownListings(this._jobKey, this._providerId)[o.id] == null);
_findNew(listings: Listing[]): Listing[] {
const newListings = listings.filter((o) => getKnownListings(this.jobKey, this.providerId)[o.id] == null);
if (newListings.length === 0) {
throw new NoNewListingsWarning();
}
return newListings;
}
_notify(newListings) {
_notify(newListings: Listing[]): Promise<Listing[]> {
if (newListings.length === 0) {
throw new NoNewListingsWarning();
}
const sendNotifications = notify.send(this._providerId, newListings, this._notificationConfig, this._jobKey);
const sendNotifications = notify.send({
serviceName: this.providerId,
newListings,
notificationConfig: this.notificationAdapterConfigs,
jobKey: this.jobKey,
});
return Promise.all(sendNotifications).then(() => newListings);
}
_save(newListings) {
const currentListings = getKnownListings(this._jobKey, this._providerId) || {};
_save(newListings: Listing[]): Listing[] {
const currentListings = getKnownListings(this.jobKey, this.providerId) || {};
newListings.forEach((listing) => {
currentListings[listing.id] = Date.now();
});
setKnownListings(this._jobKey, this._providerId, currentListings);
setKnownListings(this.jobKey, this.providerId, currentListings);
return newListings;
}
_filterBySimilarListings(listings) {
const filteredList = listings.filter((listing) => {
const similar = this._similarityCache.hasSimilarEntries(this._jobKey, listing.title);
if (similar) {
/* eslint-disable no-console */
console.debug(`Filtering similar entry for job with id ${this._jobKey} with title: `, listing.title);
/* eslint-enable no-console */
}
return !similar;

_processListings(listings: Listing[]): Promise<Listing[]> {
return process.processListings(listings, this.listingProcessors, {
jobId: this.jobKey,
providerId: this.providerId,
});
filteredList.forEach((filter) => this._similarityCache.addCacheEntry(this._jobKey, filter.title));
return filteredList;
}
_handleError(err) {

_handleError(err: Error) {
if (err.name !== 'NoNewListingsWarning') console.error(err);
}
}
Expand Down
3 changes: 2 additions & 1 deletion lib/api/routes/jobRouter.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobRouter.get('/processingTimes', async (req, res) => {
res.send();
});
jobRouter.post('/', async (req, res) => {
const { provider, notificationAdapter, name, blacklist = [], jobId, enabled } = req.body;
const { provider, notificationAdapter, name, blacklist = [], jobId, enabled, listingProcessors } = req.body;
if (
provider.find((p) => p.id === immoscoutProvider.metaInformation.id) != null &&
(config.scrapingAnt.apiKey == null || config.scrapingAnt.apiKey.length === 0)
Expand All @@ -61,6 +61,7 @@ jobRouter.post('/', async (req, res) => {
blacklist,
provider,
notificationAdapter,
listingProcessors,
});
} catch (error) {
res.send(new Error(error));
Expand Down
6 changes: 5 additions & 1 deletion lib/notification/adapter/console.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ import { markdown2Html } from '../../services/markdown.js';

export const send = ({ serviceName, newListings, jobKey }) => {
/* eslint-disable no-console */
return [Promise.resolve(console.info(`Found entry from service ${serviceName}, Job: ${jobKey}:`, newListings))];
return [
Promise.resolve(
console.info(`Found entry from service ${serviceName}, Job: ${jobKey}:`, JSON.stringify(newListings, null, 4))
),
];
/* eslint-enable no-console */
};
export const config = {
Expand Down
Empty file added lib/notification/adapter/sqs.md
Empty file.
30 changes: 30 additions & 0 deletions lib/notification/adapter/sqs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { markdown2Html } from '../../services/markdown.js';
import { NotifierAdapterConfig, SendRequest } from '../notify.js';

export const send = (sendRequest: SendRequest) => {
// TODO - Add support for the AWS sdk
};

export const config: NotifierAdapterConfig = {
id: 'sqs',
name: 'sqs',
readme: markdown2Html('lib/notification/adapter/sqs.md'),
description: 'Fredy will send new listings to an sqs queue of your choice',
fields: {
accessKeyId: {
type: 'text',
label: 'Access Key Id',
description: 'Access key id for an aws account/role',
},
secretAccessKey: {
type: 'text',
label: 'Secret Access Key',
description: 'Secret access key of an aws account/role',
},
queueName: {
type: 'text',
label: 'Sqs Queue',
description: 'The queue will all new listings will be pushed',
},
},
};
24 changes: 0 additions & 24 deletions lib/notification/notify.js

This file was deleted.

41 changes: 41 additions & 0 deletions lib/notification/notify.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { Listing } from '../provider/provider.js';
import * as Console from './adapter/console.js';
import * as Mailjet from './adapter/mailJet.js';
import * as MatterMost from './adapter/mattermost.js';
import * as Ntfy from './adapter/ntfy.js';
import * as SendGrid from './adapter/sendGrid.js';
import * as Slack from './adapter/slack.js';
import * as Telegram from './adapter/telegram.js';

const adapters = [Console, Mailjet, MatterMost, Ntfy, SendGrid, Slack, Telegram];

const findAdapter = (notificationAdapter: NotifierAdapterConfig) => {
return adapters.find((a) => a.config.id === notificationAdapter.id);
};

export type SendRequest = {
serviceName: string;
newListings: Listing[];
notificationConfig: NotifierAdapterConfig[];
jobKey: string;
};

export const send = ({ serviceName, newListings, notificationConfig, jobKey }: SendRequest) => {
//this is not being used in tests, therefore adapter are always set
return notificationConfig
.filter((notificationAdapter) => findAdapter(notificationAdapter) != null)
.map((notificationAdapter) => findAdapter(notificationAdapter))
.map((a) => a.send({ serviceName, newListings, notificationConfig, jobKey }));
};

export interface NotifierAdapter {
send: ({ serviceName, newListings, notificationConfig, jobKey }: SendRequest) => Promise<any>;
config: NotifierAdapterConfig;
}
export interface NotifierAdapterConfig {
id: string;
name: string;
description: string;
fields: any;
readme: string;
}
Loading