From 6abc3530045ae54014ff7ce6c32e1d0ffb666cdc Mon Sep 17 00:00:00 2001 From: omrilotan Date: Mon, 19 Aug 2024 15:25:36 +0100 Subject: [PATCH] Pattern updates for better recognition --- CHANGELOG.md | 4 ++++ README.md | 2 ++ fixtures/browsers.yml | 5 +++++ package.json | 2 +- src/index.ts | 2 +- src/patterns.json | 9 ++++++--- 6 files changed, 19 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54ef7d5..aaa4363 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [5.1.17](https://github.com/omrilotan/isbot/compare/v5.1.16...v5.1.17) + +- [Pattern] Pattern updates for better recognition + ## [5.1.16](https://github.com/omrilotan/isbot/compare/v5.1.15...v5.1.16) - [Pattern] Treat CCleaner broswer as an actual browser, not a bot diff --git a/README.md b/README.md index 46954c7..a11d492 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,8 @@ Recognising good bots such as web crawlers is useful for multiple purposes. Alth `isbot` is an asset when it can most accurately identify bots by the user agent string. It uses expansive and regularly updated lists of user agent strings to create a regular expression that matches bots and only bots. +And above everything else, it is maintained by a community of contributers who help keep the list up to date. + ### Fallback The pattern uses lookbehind methods which are not supported in all environments. A fallback is provided for environments that do not support lookbehind. The fallback is less accurate. The test suite includes a percentage of false positives and false negatives which is deemed acceptable for the fallback: 1% false positive and 75% bot coverage. diff --git a/fixtures/browsers.yml b/fixtures/browsers.yml index 13941b7..4cb5212 100644 --- a/fixtures/browsers.yml +++ b/fixtures/browsers.yml @@ -91,6 +91,9 @@ Brave: - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Brave Chrome/76.0.3809.132 Safari/537.36 Camino: - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en; rv:1.9.2.28) Gecko/20120308 Camino/2.1.2 (like Firefox/3.6.28) +CamScanner: + - Mozilla/5.0 (Linux; Android 13; 2201116SI Build/TKQ1.221114.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/125.0.6422.165 Mobile Safari/537.36 CamScanner/4.5.0.2305051722 + - Mozilla/5.0 (Linux; Android 14; SM-S918B Build/UP1A.231005.007; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/124.0.6367.123 Mobile Safari/537.36 CamScanner/4.8.5.2310271827 CCleaner: - Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 CCleaner/122.0.0.0 - Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 CCleaner/125.0.0.0 @@ -579,6 +582,8 @@ Snapchat: - Mozilla/5.0 (iPhone; CPU iPhone OS 13_1_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Snapchat/10.69.5.72 (iPhone11,6; iOS 13.1.3; gzip) - Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Mobile/15E148 Snapchat/10.77.5.59 (like Safari/604.1) - Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Snapchat/11.36.0.36 (like Safari/604.1) + - Mozilla/5.0 (Linux; Android 11; RMX2001 Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/124.0.6367.179 Mobile Safari/537.36 Snapchat/12.87.0.44 (RMX2001; Android 11#1647528410731#30; gzip; ) + - Mozilla/5.0 (Linux; Android 13; I2011 Build/TP1A.220624.014; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/125.0.6422.54 Mobile Safari/537.36 Snapchat/12.89.0.40 (I2011; Android 13#eng.compil.20240430.095616#33; gzip; ) Snowshoe: - Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.21 (KHTML, like Gecko) Snowshoe/1.0.0 Safari/537.21 Sogou Explorer: diff --git a/package.json b/package.json index 099c54f..6c41ff3 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "isbot", - "version": "5.1.16", + "version": "5.1.17", "description": "🤖/👨‍🦰 Recognise bots/crawlers/spiders using the user agent string.", "keywords": [ "bot", diff --git a/src/index.ts b/src/index.ts index 2ce9930..0ea3c20 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,7 +4,7 @@ import { fullPattern } from "./pattern"; /** * Naive bot pattern. */ -const naivePattern = /bot|spider|crawl|http|lighthouse/i; +const naivePattern = /bot|crawl|http|lighthouse|scan|search|spider/i; let pattern: RegExp; export function getPattern(): RegExp { diff --git a/src/patterns.json b/src/patterns.json index 21629fc..8a34d33 100644 --- a/src/patterns.json +++ b/src/patterns.json @@ -9,8 +9,8 @@ "(?