Skip to content

Commit

Permalink
Nick: improvements to search
Browse files Browse the repository at this point in the history
  • Loading branch information
nickscamara committed Apr 24, 2024
1 parent f189589 commit 307ea6f
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 29 deletions.
4 changes: 2 additions & 2 deletions apps/api/src/controllers/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export async function searchHelper(
return { success: true, data: res, returnCode: 200 };
}

res = res.filter((r) => !isUrlBlocked(r));
res = res.filter((r) => !isUrlBlocked(r.url));

if (res.length === 0) {
return { success: true, error: "No search results found", returnCode: 200 };
Expand All @@ -48,7 +48,7 @@ export async function searchHelper(
const a = new WebScraperDataProvider();
await a.setOptions({
mode: "single_urls",
urls: res.map((r) => r),
urls: res.map((r) => r.url),
crawlerOptions: {
...crawlerOptions,
},
Expand Down
17 changes: 17 additions & 0 deletions apps/api/src/lib/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,20 @@ export class Document {
this.provider = data.provider || undefined;
}
}


export class SearchResult {
url: string;
title: string;
description: string;

constructor(url: string, title: string, description: string) {
this.url = url;
this.title = title;
this.description = description;
}

toString(): string {
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
}
}
25 changes: 4 additions & 21 deletions apps/api/src/search/googlesearch.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import axios from 'axios';
import * as cheerio from 'cheerio';
import * as querystring from 'querystring';
import { SearchResult } from '../../src/lib/entities';

const _useragent_list = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
Expand Down Expand Up @@ -47,23 +48,9 @@ async function _req(term: string, results: number, lang: string, start: number,
}
}

class SearchResult {
url: string;
title: string;
description: string;

constructor(url: string, title: string, description: string) {
this.url = url;
this.title = title;
this.description = description;
}

toString(): string {
return `SearchResult(url=${this.url}, title=${this.title}, description=${this.description})`;
}
}

export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<string[]> {
export async function google_search(term: string, advanced = false, num_results = 7, tbs = null, filter = null, lang = "en", proxy = null, sleep_interval = 0, timeout = 5000, ) :Promise<SearchResult[]> {
const escaped_term = querystring.escape(term);

let proxies = null;
Expand All @@ -78,7 +65,7 @@ export async function google_search(term: string, advanced = false, num_results
// TODO: knowledge graph, answer box, etc.

let start = 0;
let results : string[] = [];
let results : SearchResult[] = [];
let attempts = 0;
const maxAttempts = 20; // Define a maximum number of attempts to prevent infinite loop
while (start < num_results && attempts < maxAttempts) {
Expand All @@ -103,11 +90,7 @@ export async function google_search(term: string, advanced = false, num_results
const description = description_box.text();
if (link && title && description) {
start += 1;
if (advanced) {
// results.push(new SearchResult(link, title.text(), description));
} else {
results.push(link);
}
results.push(new SearchResult(link, title.text(), description));
}
}
});
Expand Down
3 changes: 2 additions & 1 deletion apps/api/src/search/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { SearchResult } from "../../src/lib/entities";
import { google_search } from "./googlesearch";
import { serper_search } from "./serper";

Expand All @@ -21,7 +22,7 @@ export async function search({
proxy?: string;
sleep_interval?: number;
timeout?: number;
}) {
}) : Promise<SearchResult[]> {
try {
if (process.env.SERPER_API_KEY && !tbs) {
return await serper_search(query, num_results);
Expand Down
14 changes: 9 additions & 5 deletions apps/api/src/search/serper.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import axios from "axios";
import dotenv from "dotenv";
import { SearchResult } from "../../src/lib/entities";

dotenv.config();

export async function serper_search(q, num_results) : Promise<string[]> {
export async function serper_search(q, num_results): Promise<SearchResult[]> {
let data = JSON.stringify({
q: q,
"num": num_results,

num: num_results,
});

let config = {
Expand All @@ -21,8 +21,12 @@ export async function serper_search(q, num_results) : Promise<string[]> {
};
const response = await axios(config);
if (response && response.data && Array.isArray(response.data.organic)) {
return response.data.organic.map((a) => a.link);
} else {
return response.data.organic.map((a) => ({
url: a.link,
title: a.title,
description: a.snippet,
}));
}else{
return [];
}
}

0 comments on commit 307ea6f

Please sign in to comment.