From bc96493b725cafd2a564518e4dfb36433d1eaf63 Mon Sep 17 00:00:00 2001 From: "luan.lrt4@gmail.com" Date: Thu, 7 Apr 2022 21:30:37 -0300 Subject: [PATCH] fix: parsing problems and other major bugs --- README.md | 4 +- examples/index.js | 3 +- lib/constants.js | 6 +-- lib/googlethis.js | 34 +++++++------ lib/parser.js | 100 +++++++++++++++++++++--------------- lib/utils.js | 18 +++++-- package-lock.json | 126 +++++++++++++++++++++++++--------------------- package.json | 15 +++--- test/index.js | 66 ++++++++++++------------ 9 files changed, 210 insertions(+), 162 deletions(-) diff --git a/README.md b/README.md index 1778405..7500856 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ async function start() { } } - const response = await google.search("TWDG", options); + const response = await google.search('TWDG', options); console.log(response); } @@ -215,7 +215,7 @@ start(); ## What else can it do? -As you may have noticed, the library returns a lot of data. Currently it can parse everything from the knowledge graph, featured snippets and much more such as Google Dictionary, Google Translator and song lyrics. +As you can see, the library returns a lot of data. Currently it can parse everything from the knowledge graph, featured snippets and much more such as Google Dictionary, Google Translate and song lyrics. All you have to do is search something along the lines of; ```“define xyz”```, ```“translate x to y”``` or ```“xyz song lyrics”``` and the appropriated fields will appear in the response. #### Examples: diff --git a/examples/index.js b/examples/index.js index 8fbe9d5..9c87657 100644 --- a/examples/index.js +++ b/examples/index.js @@ -18,8 +18,7 @@ async function start() { // Reverse Image Search const reverse = await google.search("https://i.pinimg.com/236x/92/16/d9/9216d9a222ef65eb6eabfff1970180d1.jpg", { ris: true }); console.info('Reverse Image Search:', reverse.results); - - // Top news + const news = await google.getTopNews(); console.info('Google Top News:', news); } diff --git a/lib/constants.js b/lib/constants.js index e825aab..707e858 100644 --- a/lib/constants.js +++ b/lib/constants.js @@ -29,7 +29,7 @@ module.exports = { KNO_PANEL_TYPE: 'div.BkwXh > div', KNO_PANEL_SONG_LYRICS: 'div.ujudUb', KNO_PANEL_AVAILABLE_ON: 'div[class="ellip bclEt"]', - KNO_PANEL_IMAGES: 'div > g-scrolling-carousel > div > div > div > g-inner-card > g-img > img', + KNO_PANEL_IMAGES: 'div > g-inner-card.xIfh4d > div > img', KNO_PANEL_BOOKS: 'div[data-attrid="kc:/book/author:books only"] > a > div > div > div.Bo9xMe > div', KNO_PANEL_TV_SHOWS_AND_MOVIES: 'div[data-attrid="kc:/people/person:tv-shows-and-movies"] > a > div > div > div.Bo9xMe > div', KNO_PANEL_FILM_GOOGLEUSERS_RATING: 'div[data-attrid="kc:/ugc:thumbs_up"] > div > div > div', @@ -68,7 +68,7 @@ module.exports = { // Google Dictionary GD_WORD: 'span[data-dobid="hdw"]', - GD_PHONETIC: 'div[class="S23sjd"]', + GD_PHONETIC: 'div.qexShd', GD_AUDIO: 'audio > source', GD_DEFINITIONS: 'div[data-dobid="dfn"]', GD_EXAMPLES: 'div[class="ubHt5c"]', @@ -87,7 +87,7 @@ module.exports = { TOP_STORIES_WEBSITE: 'div[class="g5wfEd"] > div > g-img > img', // “People also ask” - PAA: [ 'div.s75CSd.u60jwe.gduDCb > span', 'div.wWOJcd > div > span', 'div.SC9Vz > div.zd9Fwc' ], + PAA: [ 'div.s75CSd.u60jwe.gduDCb > span', 'div.gbCQS.u60jwe.gduDCb > div > span', 'div.JlqpRe > span' ], // “People also search for” PASF: 'div[class="IHdOHf"] > img', diff --git a/lib/googlethis.js b/lib/googlethis.js index a7919e5..30528b5 100644 --- a/lib/googlethis.js +++ b/lib/googlethis.js @@ -7,10 +7,14 @@ const Cheerio = require('cheerio'); const Constants = require('./constants'); /** - * Searches a given query on Google. + * Search a given query on Google. * - * @param {string} query Query. - * @param {object} options Search options. + * @param {string} query - Search query + * @param {object} [options] Search options + * @param {boolean} [options.ris] - Use reverse image search + * @param {boolean} [options.safe] - Safe search + * @param {number} [options.page] - Pagination + * @param {object} [options.additional_params] - Parameters that will be passed to Google */ async function search(query, options = {}) { query = query.trim().split(/ +/).join('+'); @@ -27,10 +31,7 @@ async function search(query, options = {}) { '&start=' + options.page); const response = await Axios.get(url, { params: options.additional_params, headers: Utils.getHeaders(true) }).catch((err) => err); - if (response instanceof Error) throw new Error('Could not search on Google: ' + response.message); - - const $ = Cheerio.load(Utils.refineData(response.data)); - const parser = new Parser($, response.data); + if (response instanceof Error) throw new Utils.SearchError('Could not execute search', { status_code: response?.status || 0, message: response?.message }); const results = { results: [], @@ -41,12 +42,14 @@ async function search(query, options = {}) { people_also_ask: [], people_also_search_for: [] }; + + const parser = new Parser(response.data); results.results = parser.getOrganicResults(); results.knowledge_panel = parser.getKnowledgeGraph(); results.featured_snippet = parser.getFeaturedSnippet(); - const did_you_mean = $(Constants.SELECTORS.DID_YOU_MEAN).text(); + const did_you_mean = parser.getDidYouMean(); did_you_mean && (results.did_you_mean = did_you_mean) || (delete results.did_you_mean); const unit_converter = parser.getConverters(); @@ -80,10 +83,13 @@ async function search(query, options = {}) { } /** - * Searches images on Google. + * Google image search. * - * @param {string} query Search query. - * @param {object} options Search options. + * @param {string} query - Search query + * @param {object} [options] - Search options + * @param {boolean} [options.safe] - Safe search + * @param {object} [options.additional_params] - Parameters that will be passed to Google + * @param {Array.} [options.exclude_domains] - Domains that should be blocked */ async function image(query, options = {}) { query = query.trim().split(/ +/).join('+'); @@ -97,7 +103,7 @@ async function image(query, options = {}) { ' ' + options.exclude_domains.map((site) => '-site:' + site); const response = await Axios.get(url, { params: options.additional_params, headers: Utils.getHeaders(false) }).catch((err) => err); - if (response instanceof Error) throw new Error('Could not search on Google: ' + response.message); + if (response instanceof Error) throw new Utils.SearchError('Could not execute search', { status_code: response?.status || 0, message: response?.message }); const results = []; const origin = parseImageOriginData(response.data); @@ -123,7 +129,7 @@ async function image(query, options = {}) { /** * Gets image origin data * - * @param {string} data Raw html. + * @param {string} data - Raw html. */ function parseImageOriginData(data) { let results = []; @@ -132,7 +138,7 @@ function parseImageOriginData(data) { while (parsed_results != null) { results.push({ title: parsed_results[4], - website: parsed_results[3], + source: parsed_results[3], }); parsed_results = Constants.REGEX.IMAGE_ORIGIN.exec(data); } diff --git a/lib/parser.js b/lib/parser.js index d04734f..22addef 100644 --- a/lib/parser.js +++ b/lib/parser.js @@ -1,13 +1,15 @@ 'use strict'; const Utils = require('./utils'); +const Unraw = require('unraw').default; +const Cheerio = require('cheerio'); const Constants = require('./constants'); const NormalizeText = require('replace-special-characters'); - + class Parser { - constructor($, raw_data) { - this.$ = $; - this.raw_data = raw_data; + constructor(data) { + this.data = data; + this.$ = Cheerio.load(Utils.refineData(data)); } getOrganicResults() { @@ -23,7 +25,8 @@ class Parser { return this.$(el).text().trim(); }).get(); - const urls = this.$(Constants.SELECTORS.URL).map((i, el) => this.$(el).attr('href')).get(); + const urls = this.$(Constants.SELECTORS.URL) + .map((i, el) => this.$(el).attr('href')).get(); this.#correctFuzzyData(titles, descriptions, urls); @@ -49,7 +52,7 @@ class Parser { this.$(Constants.SELECTORS.KNO_PANEL_METADATA).each((i, el) => { const key = this.$(el).first().text().trim().slice(0, -1); const value = this.$(el).next().text().trim(); - value.length && (knowledge_panel[NormalizeText(key.toLowerCase().replace(/ /g, '_'))] = value.trim()); + value.length && (knowledge_panel[NormalizeText(key.toLowerCase().replace(/ /g, '_').replace(/\(|\)/g, ''))] = value.trim()); }); const knowledge_panel_type = this.$(Constants.SELECTORS.KNO_PANEL_TYPE).last().text(); @@ -79,7 +82,7 @@ class Parser { .replace(/<\/span><\/div>
/g, '\n\n') .replace(/
/g, '\n')).text()).get(); - song_lyrics.length > 0 && (knowledge_panel.lyrics = song_lyrics.join('\n\n')); + song_lyrics.length && (knowledge_panel.lyrics = song_lyrics.join('\n\n')); const google_users_rating = this.$(Constants.SELECTORS.KNO_PANEL_FILM_GOOGLEUSERS_RATING)[0]; if (google_users_rating) { @@ -106,20 +109,20 @@ class Parser { knowledge_panel.images = this.$(Constants.SELECTORS.KNO_PANEL_IMAGES).map((i, elem) => { return { url: this.$(elem).attr('data-src'), - source: this.$(elem).parent().parent().parent().attr('data-lpage'), + source: this.$(elem).parent().parent().parent().parent().attr('data-lpage'), }; - }).get().filter((img) => img.url !== undefined); + }).get().filter((img) => img.url); - const demo = Utils.getStringBetweenStrings(this.raw_data, 'source src\\x3d\\x22', '.mp4'); + const demo = Utils.getStringBetweenStrings(this.data, 'source src\\x3d\\x22', '.mp4'); demo && (knowledge_panel.demonstration = demo + '.mp4'); - knowledge_panel.books.length == 0 && + !knowledge_panel.books.length && delete knowledge_panel.books; - knowledge_panel.tv_shows_and_movies.length == 0 && + !knowledge_panel.tv_shows_and_movies.length && delete knowledge_panel.tv_shows_and_movies; - knowledge_panel.available_on.length == 0 && + !knowledge_panel.available_on.length && delete knowledge_panel.available_on; - knowledge_panel.images.length == 0 && + !knowledge_panel.images.length && delete knowledge_panel.images; return knowledge_panel; @@ -149,7 +152,7 @@ class Parser { } else { return undefined; } - }).filter(text => text != undefined && text.length != 0)[0]; + }).filter(text => text && text.length)[0]; return { title: featured_snippet_title || 'N/A', @@ -158,42 +161,28 @@ class Parser { }; } + getDidYouMean() { + return this.$(Constants.SELECTORS.DID_YOU_MEAN).text(); + } + getTopStories() { // Removes unnecessary text from the description - this.$(`${Constants.SELECTORS.TOP_STORIES_DESCRIPTION[0]} > div.CEMjEf`).each((i, el) => this.$(el).remove()); - this.$(`${Constants.SELECTORS.TOP_STORIES_DESCRIPTION[0]} > div > p`).each((i, el) => this.$(el).remove()); + this.$(`${Constants.SELECTORS.TOP_STORIES_DESCRIPTION[0]} > div.CEMjEf`).each((el) => this.$(el).remove()); + this.$(`${Constants.SELECTORS.TOP_STORIES_DESCRIPTION[0]} > div > p`).each((el) => this.$(el).remove()); const top_stories_descriptions = Constants.SELECTORS.TOP_STORIES_DESCRIPTION.map((selector) => - this.$(selector).map((i, el) => this.$(el).text().slice(1)).get()).filter((descs) => descs.length > 0)[0]; - const top_stories_urls = this.$(Constants.SELECTORS.TOP_STORIES_URL).map((i, el) => this.$(el).attr('href')).get(); + this.$(selector).map((el) => this.$(el).text()).get()).filter((descs) => descs.length > 0)[0]; + const top_stories_urls = this.$(Constants.SELECTORS.TOP_STORIES_URL).map((el) => this.$(el).attr('href')).get(); return top_stories_urls.map((item, i) => { if (!top_stories_descriptions) return; return { description: top_stories_descriptions[i], - url: item, + url: item }; }).filter((story) => story); } - getPaa() { - let people_also_ask = []; - Constants.SELECTORS.PAA.forEach((item) => - this.$(item).each((i, el) => people_also_ask.push(this.$(el).text()))); - people_also_ask.shift(); - return people_also_ask; - } - - getPas() { - return this.$(Constants.SELECTORS.PASF).map((i, el) => { - if (!this.$(el).attr('data-src')) return; - return { - title: this.$(el).attr('alt'), - thumbnail: `https:${this.$(el).attr('data-src')}` - }; - }).get(); - } - getTime() { const hours = this.$(Constants.SELECTORS.CURRENT_TIME_HOUR).text(); const date = this.$(Constants.SELECTORS.CURRENT_TIME_DATE).map((i, el) => this.$(el).text()).get()[1]; @@ -303,15 +292,44 @@ class Parser { } } + getPaa() { + const people_also_ask = []; + + Constants.SELECTORS.PAA.forEach((item) => + this.$(item).each((i, el) => people_also_ask.push(this.$(el).text()))); + + people_also_ask.shift(); + + const extra_data = JSON.parse(Unraw(Utils.getStringBetweenStrings(this.data, 'var c=\'', '\';google') || '{}')); + const rfs = extra_data?.sb_wiz?.rfs; + + rfs && rfs.forEach((el) => { + const item = el.replace(/|<\/b>/g, ''); + people_also_ask.push(item); + }); + + return people_also_ask; + } + + getPas() { + return this.$(Constants.SELECTORS.PASF).map((i, el) => { + if (!this.$(el).attr('data-src')) return; + return { + title: this.$(el).attr('alt'), + thumbnail: `https:${this.$(el).attr('data-src')}` + }; + }).get(); + } + #correctFuzzyData(titles, descriptions, urls) { titles.length < urls.length && titles.length < descriptions.length && urls.shift(); urls.length > titles.length && urls.shift(); - const innacurate_data = descriptions.length > urls.slice(1).length ? false : true; - + const is_innacurate_data = descriptions.length < urls.slice(1).length; + urls.forEach((item, index) => { // Why YouTube? Because video results usually don't have a description. - if (item.includes('m.youtube.com') && innacurate_data && Constants.URLS.length > 1) { + if (item.includes('m.youtube.com') && is_innacurate_data) { urls.splice(index, 1); titles.splice(index, 1); index--; diff --git a/lib/utils.js b/lib/utils.js index 501b5c5..c3664ce 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -2,6 +2,14 @@ const UserAgent = require('user-agents'); +function SearchError(message, info) { + this.info = info; + this.stack = Error(message).stack; +} + +SearchError.prototype = Object.create(Error.prototype); +SearchError.prototype.constructor = SearchError; + /** * Returns headers with a random user agent. * @@ -22,7 +30,7 @@ function getHeaders (is_mobile) { /** * Refines the html. * - * @param {string} data Raw html data. + * @param {string} data - Raw html data. * @returns {string} Refined data. */ function refineData (data) { @@ -54,9 +62,9 @@ function refineData (data) { /** * Gets a string between two delimiters. * - * @param {string} data The data. - * @param {string} start_string Start string. - * @param {string} end_string End string. + * @param {string} data - The data. + * @param {string} start_string - Start string. + * @param {string} end_string - End string. */ function getStringBetweenStrings (data, start_string, end_string) { const regex = new RegExp(`${escapeStringRegexp(start_string)}(.*?)${escapeStringRegexp(end_string)}`, "s"); @@ -68,4 +76,4 @@ function escapeStringRegexp (string) { return string.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&').replace(/-/g, '\\x2d'); } -module.exports = { getHeaders, getStringBetweenStrings, refineData }; \ No newline at end of file +module.exports = { SearchError, getHeaders, getStringBetweenStrings, refineData }; \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index d2ec02f..4a4a09e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,18 +1,22 @@ { "name": "googlethis", - "version": "1.2.7", + "version": "1.2.8", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "googlethis", - "version": "1.2.7", + "version": "1.2.8", "license": "MIT", "dependencies": { "axios": "^0.21.1", "cheerio": "^1.0.0-rc.6", "replace-special-characters": "1.2.5", + "unraw": "^2.0.1", "user-agents": "^1.0.784" + }, + "funding": { + "url": "https://ko-fi.com/luanrt" } }, "node_modules/axios": { @@ -49,28 +53,28 @@ } }, "node_modules/cheerio-select": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.5.0.tgz", - "integrity": "sha512-qocaHPv5ypefh6YNxvnbABM07KMxExbtbfuJoIie3iZXX1ERwYmJcIiRrr9H05ucQP1k28dav8rpdDgjQd8drg==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.6.0.tgz", + "integrity": "sha512-eq0GdBvxVFbqWgmCm7M3XGs1I8oLy/nExUnh6oLqmBditPO9AqQJrkslDpMun/hZ0yyTs8L0m85OHp4ho6Qm9g==", "dependencies": { - "css-select": "^4.1.3", - "css-what": "^5.0.1", + "css-select": "^4.3.0", + "css-what": "^6.0.1", "domelementtype": "^2.2.0", - "domhandler": "^4.2.0", - "domutils": "^2.7.0" + "domhandler": "^4.3.1", + "domutils": "^2.8.0" }, "funding": { "url": "https://github.com/sponsors/fb55" } }, "node_modules/css-select": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.2.1.tgz", - "integrity": "sha512-/aUslKhzkTNCQUB2qTX84lVmfia9NyjP3WpDGtj/WxhwBzWBYUV3DgUpurHTme8UTPcPlAD1DJ+b0nN/t50zDQ==", + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz", + "integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==", "dependencies": { "boolbase": "^1.0.0", - "css-what": "^5.1.0", - "domhandler": "^4.3.0", + "css-what": "^6.0.1", + "domhandler": "^4.3.1", "domutils": "^2.8.0", "nth-check": "^2.0.1" }, @@ -79,9 +83,9 @@ } }, "node_modules/css-what": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/css-what/-/css-what-5.1.0.tgz", - "integrity": "sha512-arSMRWIIFY0hV8pIxZMEfmMI47Wj3R/aWpZDDxWYCPEiOMv6tfOrnpDtgxBYPEQD4V0Y/958+1TdC3iWTFcUPw==", + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==", "engines": { "node": ">= 6" }, @@ -119,9 +123,9 @@ } }, "node_modules/domelementtype": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.2.0.tgz", - "integrity": "sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", "funding": [ { "type": "github", @@ -130,9 +134,9 @@ ] }, "node_modules/domhandler": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.0.tgz", - "integrity": "sha512-fC0aXNQXqKSFTr2wDNZDhsEYjCiYsDWl3D01kwt25hm1YIPyDGHvvi3rw+PLqHAl/m71MaiF7d5zvBr0p5UB2g==", + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz", + "integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==", "dependencies": { "domelementtype": "^2.2.0" }, @@ -178,9 +182,9 @@ } }, "node_modules/follow-redirects": { - "version": "1.14.8", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.8.tgz", - "integrity": "sha512-1x0S9UVJHsQprFcEC/qnNzBLcIxsjAV905f/UkQxbclCsoTWlacCNOpQa/anodLl2uaEKFhfWOvM2Qg77+15zA==", + "version": "1.14.9", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.9.tgz", + "integrity": "sha512-MQDfihBQYMcyy5dhRDJUHcw7lb2Pv/TuE6xP1vyraLukNDHKbDxDNaOE3NbCAdKQApno+GPRyo1YAp89yCjK4w==", "funding": [ { "type": "individual", @@ -266,10 +270,15 @@ "underscore": "*" } }, + "node_modules/unraw": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/unraw/-/unraw-2.0.1.tgz", + "integrity": "sha512-tdOvLfRzHolwYcHS6HIX860MkK9LQ4+oLuNwFYL7bpgTEO64PZrcQxkisgwJYCfF8sKiWLwwu1c83DvMkbefIQ==" + }, "node_modules/user-agents": { - "version": "1.0.905", - "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.0.905.tgz", - "integrity": "sha512-Q9vn9b5T+TF4KC3olBzUXr2nkIE932kw8ZN1CQJikX8pyh2sNtwGdteDl/JH9BXh3UPltOUCWB5Hl04X6B4K0w==", + "version": "1.0.979", + "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.0.979.tgz", + "integrity": "sha512-ERZ+T8TbKjIxzDKPI/JYymSu5o3qwrd704n2GQISso2gmRv6e6bFQ8/zEieqDKw57ZtVxLIZAiQ/gd8P95n/lw==", "dependencies": { "dot-json": "^1.2.2", "lodash.clonedeep": "^4.5.0" @@ -305,33 +314,33 @@ } }, "cheerio-select": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.5.0.tgz", - "integrity": "sha512-qocaHPv5ypefh6YNxvnbABM07KMxExbtbfuJoIie3iZXX1ERwYmJcIiRrr9H05ucQP1k28dav8rpdDgjQd8drg==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.6.0.tgz", + "integrity": "sha512-eq0GdBvxVFbqWgmCm7M3XGs1I8oLy/nExUnh6oLqmBditPO9AqQJrkslDpMun/hZ0yyTs8L0m85OHp4ho6Qm9g==", "requires": { - "css-select": "^4.1.3", - "css-what": "^5.0.1", + "css-select": "^4.3.0", + "css-what": "^6.0.1", "domelementtype": "^2.2.0", - "domhandler": "^4.2.0", - "domutils": "^2.7.0" + "domhandler": "^4.3.1", + "domutils": "^2.8.0" } }, "css-select": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.2.1.tgz", - "integrity": "sha512-/aUslKhzkTNCQUB2qTX84lVmfia9NyjP3WpDGtj/WxhwBzWBYUV3DgUpurHTme8UTPcPlAD1DJ+b0nN/t50zDQ==", + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz", + "integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==", "requires": { "boolbase": "^1.0.0", - "css-what": "^5.1.0", - "domhandler": "^4.3.0", + "css-what": "^6.0.1", + "domhandler": "^4.3.1", "domutils": "^2.8.0", "nth-check": "^2.0.1" } }, "css-what": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/css-what/-/css-what-5.1.0.tgz", - "integrity": "sha512-arSMRWIIFY0hV8pIxZMEfmMI47Wj3R/aWpZDDxWYCPEiOMv6tfOrnpDtgxBYPEQD4V0Y/958+1TdC3iWTFcUPw==" + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz", + "integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==" }, "detect-indent": { "version": "6.0.0", @@ -354,14 +363,14 @@ } }, "domelementtype": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.2.0.tgz", - "integrity": "sha512-DtBMo82pv1dFtUmHyr48beiuq792Sxohr+8Hm9zoxklYPfa6n0Z3Byjj2IV7bmr2IyqClnqEQhfgHJJ5QF0R5A==" + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==" }, "domhandler": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.0.tgz", - "integrity": "sha512-fC0aXNQXqKSFTr2wDNZDhsEYjCiYsDWl3D01kwt25hm1YIPyDGHvvi3rw+PLqHAl/m71MaiF7d5zvBr0p5UB2g==", + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz", + "integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==", "requires": { "domelementtype": "^2.2.0" } @@ -392,9 +401,9 @@ "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==" }, "follow-redirects": { - "version": "1.14.8", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.8.tgz", - "integrity": "sha512-1x0S9UVJHsQprFcEC/qnNzBLcIxsjAV905f/UkQxbclCsoTWlacCNOpQa/anodLl2uaEKFhfWOvM2Qg77+15zA==" + "version": "1.14.9", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.14.9.tgz", + "integrity": "sha512-MQDfihBQYMcyy5dhRDJUHcw7lb2Pv/TuE6xP1vyraLukNDHKbDxDNaOE3NbCAdKQApno+GPRyo1YAp89yCjK4w==" }, "htmlparser2": { "version": "6.1.0", @@ -456,10 +465,15 @@ "underscore": "*" } }, + "unraw": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/unraw/-/unraw-2.0.1.tgz", + "integrity": "sha512-tdOvLfRzHolwYcHS6HIX860MkK9LQ4+oLuNwFYL7bpgTEO64PZrcQxkisgwJYCfF8sKiWLwwu1c83DvMkbefIQ==" + }, "user-agents": { - "version": "1.0.905", - "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.0.905.tgz", - "integrity": "sha512-Q9vn9b5T+TF4KC3olBzUXr2nkIE932kw8ZN1CQJikX8pyh2sNtwGdteDl/JH9BXh3UPltOUCWB5Hl04X6B4K0w==", + "version": "1.0.979", + "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.0.979.tgz", + "integrity": "sha512-ERZ+T8TbKjIxzDKPI/JYymSu5o3qwrd704n2GQISso2gmRv6e6bFQ8/zEieqDKw57ZtVxLIZAiQ/gd8P95n/lw==", "requires": { "dot-json": "^1.2.2", "lodash.clonedeep": "^4.5.0" diff --git a/package.json b/package.json index 1ae6b44..fa65a1e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "googlethis", - "version": "1.2.7", + "version": "1.2.8", "description": "A simple yet powerful module to retrieve organic search results and much more from Google.", "main": "index.js", "scripts": { @@ -10,7 +10,8 @@ "axios": "^0.21.1", "cheerio": "^1.0.0-rc.6", "replace-special-characters": "1.2.5", - "user-agents": "^1.0.784" + "user-agents": "^1.0.784", + "unraw": "^2.0.1" }, "repository": { "type": "git", @@ -25,13 +26,15 @@ "search-results", "reverse-image-search", "image-search", - "image", - "search", - "gis", + "web-scraping", "scraper", - "web-scraping" + "search", + "image", + "serp", + "gis" ], "author": "LuanRT", + "funding": "https://ko-fi.com/luanrt", "license": "MIT", "bugs": { "url": "https://github.com/LuanRT/google-this/issues" diff --git a/test/index.js b/test/index.js index 43be8e6..fd6afd5 100644 --- a/test/index.js +++ b/test/index.js @@ -1,34 +1,34 @@ -'use strict'; - -const google = require('..'); -let failed_tests = 0; - -async function start() { - const search = await google.search('Stephen Hawking').catch((err) => err); - assert(!(search instanceof Error) && !!search.results.length, 'should search a query on google.', search); - - const image = await google.image('Supermassive Blackhole').catch((err) => err); - assert(!(image instanceof Error) && !!image.length, 'should do image search.', image); - - const news = await google.getTopNews().catch((err) => err); - assert(!(news instanceof Error) && !!news.headline_stories.length, 'should get top news from google.', news); - - const reverse = await google.search('https://i.pinimg.com/236x/92/16/d9/9216d9a222ef65eb6eabfff1970180d1.jpg', { ris: true }); - assert(!(reverse instanceof Error) && !!reverse.results.length, 'should do reverse image search.', search); - - if (failed_tests > 0) - throw new Error('Some tests have failed'); -} - -function assert(outcome, description, data) { - const pass_fail = outcome ? 'pass' : 'fail'; - - !outcome && (failed_tests += 1); - !outcome && console.error(data); - - console.info(pass_fail, ':', description); - - return outcome; -} - +'use strict'; + +const google = require('..'); +let failed_tests = 0; + +async function start() { + const search = await google.search('Stephen Hawking').catch((err) => err); + assert(!(search instanceof Error) && search.results.length, 'should search a query on google.', search); + + const image = await google.image('Supermassive Blackhole').catch((err) => err); + assert(!(image instanceof Error) && image.length, 'should do image search.', image); + + const reverse = await google.search('https://i.pinimg.com/236x/92/16/d9/9216d9a222ef65eb6eabfff1970180d1.jpg', { ris: true }); + assert(!(reverse instanceof Error) && reverse.results.length, 'should do reverse image search.', search); + + const news = await google.getTopNews().catch((err) => err); + assert(!(news instanceof Error) && news.headline_stories.length, 'should get top news from google.', news); + + if (failed_tests > 0) + throw new Error('Some tests have failed'); +} + +function assert(outcome, description, data) { + const pass_fail = outcome ? 'pass' : 'fail'; + + !outcome && (failed_tests += 1); + !outcome && console.error(data); + + console.info(pass_fail, ':', description); + + return outcome; +} + start(); \ No newline at end of file