From 2d316e2e0146ac862c0981db67cea74033aefda8 Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:29:21 -0400 Subject: [PATCH 1/8] Add plugins page settings + review nudge. Closes #49 --- block-ai-crawlers.php | 52 ++++++++++++++++++++++++++++++++++++++++++- readme.txt | 7 ++++-- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index d3fadfb..bfc5cdd 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -5,7 +5,7 @@ * Author: Bob Matyas * Author URI: https://www.bobmatyas.com * Text Domain: block-ai-crawlers - * Version: 1.3.8 + * Version: 1.3.9 * License: GPL-2.0-or-later * License URI: https://www.gnu.org/licenses/gpl-2.0.html * @@ -74,3 +74,53 @@ function block_ai_activate() { } register_activation_hook( __FILE__, 'block_ai_activate' ); + +add_filter( 'plugin_action_links', 'block_ai_prepend_plugin_settings_link', 10, 2 ); + +/** + * Adds seettings link to plugins page + * + * @param array $links_array An array of the plugin's metadata. + * @param string $plugin_file_name Path to the plugin file. + * @return array $links_array + */ +function block_ai_prepend_plugin_settings_link( $links_array, $plugin_file_name ) { + if ( strpos( $plugin_file_name, basename( __FILE__ ) ) ) { + array_unshift( $links_array, 'Settings' ); + } + return $links_array; +} + + +/** + * Adds ratings nudge to plugins page + * + * @access public + * @param array $links_array An array of the plugin's metadata. + * @param string $plugin_file_name Path to the plugin file. + * @return array $links_array + */ +function block_ai_append_plugin_rating( $links_array, $plugin_file_name ) { + if ( strpos( $plugin_file_name, basename( __FILE__ ) ) ) { + + $links_array[] = " + " + . "" + . "" + . "" + . "" + . "" + . ''; + + $stars_color = '#ffb900'; + + echo ''; + } + + return $links_array; +} + +add_filter( 'plugin_row_meta', 'block_ai_append_plugin_rating', 10, 4 ); \ No newline at end of file diff --git a/readme.txt b/readme.txt index cf117ab..87cf8ec 100644 --- a/readme.txt +++ b/readme.txt @@ -2,9 +2,9 @@ Contributors: lastsplash Tags: ai, robots.txt, chatgpt, crawlers Requires at least: 5.6 -Tested up to: 6.5.3 +Tested up to: 6.6 Requires PHP: 7.4 -Stable tag: 1.3.8 +Stable tag: 1.3.9 License: GPLv2 or later License URI: https://www.gnu.org/licenses/gpl-2.0.html @@ -72,6 +72,9 @@ No. Search engines follow different `robots.txt` rules. == Changelog == += 1.3.9 = +- Enhancement: Add quick link to settings and nudge for rating on plugins page + = 1.3.8 = - Maintenance: Auto-deply from Github fixed / bumped version number From 89f7a6f997cef76ab694013e7c4e44653c73a7be Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:31:31 -0400 Subject: [PATCH 2/8] Update readme.txt. Closes #48 --- readme.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.txt b/readme.txt index 87cf8ec..4cf1717 100644 --- a/readme.txt +++ b/readme.txt @@ -8,7 +8,7 @@ Stable tag: 1.3.9 License: GPLv2 or later License URI: https://www.gnu.org/licenses/gpl-2.0.html -Tell AI crawlers not to access your site to train their models. +Tells AI companies not to access and scrape your site for AI. == Description == From 87d511f0f8567068b667039e0979abf195501aab Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:39:38 -0400 Subject: [PATCH 3/8] Block SearchGPT. Closes #47 --- block-ai-crawlers.php | 3 ++- inc/settings-html.php | 7 ++++++- readme.txt | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index bfc5cdd..5bb2cac 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -44,10 +44,11 @@ function block_ai_robots_txt( $robots ) { $robots .= "User-agent: ImagesiftBot\n"; $robots .= "User-agent: Meta-ExternalAgent\n"; $robots .= "User-agent: Meta-ExternalFetcher\n"; + $robots .= "User-agent: OAI-SearchBot\n"; $robots .= "User-agent: Omgili\n"; $robots .= "User-agent: Omgilibot\n"; $robots .= "User-agent: PerplexityBot\n"; - $robots .= "User-agent: Timpibot\n"; + $robots .= "User-agent: Timpibot\n"; $robots .= "User-agent: YouBot\n"; $robots .= "Disallow: /\n\n"; $robots .= "# End Block AI Crawlers\n"; diff --git a/inc/settings-html.php b/inc/settings-html.php index 592af47..9678530 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -74,7 +74,7 @@ ImagesiftBot -

Used by Hive's Imagesift tool that scrapes images. THis may be used for the company's generative AI product

+

Used by Hive's Imagesift tool that scrapes images. This may be used for the company's generative AI product

More Info @@ -107,6 +107,11 @@

Used by Timpi; likely for their Wilson AI Product.

More Info + + OAI-SearchBot +

Used by OpenAI for their SearchGPT product.

+ More Info + diff --git a/readme.txt b/readme.txt index 4cf1717..7dc0ba3 100644 --- a/readme.txt +++ b/readme.txt @@ -73,6 +73,8 @@ No. Search engines follow different `robots.txt` rules. == Changelog == = 1.3.9 = +- New: Block OpenAI Search Bot +- Enhancement: Indicate compatibility with WordPress v6.6 - Enhancement: Add quick link to settings and nudge for rating on plugins page = 1.3.8 = From a7e10829fa7da1b7db1d1fe07c4294aad225b528 Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:45:27 -0400 Subject: [PATCH 4/8] Block Webz.io. Closes #46 --- block-ai-crawlers.php | 2 ++ inc/settings-html.php | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index 5bb2cac..795fb9d 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -50,6 +50,8 @@ function block_ai_robots_txt( $robots ) { $robots .= "User-agent: PerplexityBot\n"; $robots .= "User-agent: Timpibot\n"; $robots .= "User-agent: YouBot\n"; + $robots .= "User-agent: webzio\n"; + $robots .= "User-agent: webzio-extended\n"; $robots .= "Disallow: /\n\n"; $robots .= "# End Block AI Crawlers\n"; return ( $robots ); diff --git a/inc/settings-html.php b/inc/settings-html.php index 9678530..0bf5ce5 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -112,6 +112,15 @@

Used by OpenAI for their SearchGPT product.

More Info + + Webzio +

Used by Webz.io for their social listening and intelligence platforms.

+ More Info + + Webzio-Extended +

Used by Webz.io for AI training.

+ More Info + From db24cdc986df9ec4c03f3629f07d3168852cfd9d Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:57:27 -0400 Subject: [PATCH 5/8] Alphabetize settings table. Closes ##50 --- inc/css/admin-style.css | 10 +- inc/settings-html.php | 200 ++++++++++++++++++++-------------------- 2 files changed, 106 insertions(+), 104 deletions(-) diff --git a/inc/css/admin-style.css b/inc/css/admin-style.css index 7c0c34a..52c03bd 100644 --- a/inc/css/admin-style.css +++ b/inc/css/admin-style.css @@ -16,6 +16,10 @@ color: rgb(75, 75, 75); } +.block-ai-container div.block-ai-info table p { + font-size: 100%; +} + .block-ai-container .link { font-size: 100%; padding-top: 2px; @@ -30,10 +34,8 @@ .block-ai-container div.block-ai-info table th, .block-ai-container div.block-ai-info table p { color: rgb(75, 75, 75); - font-size: 120%; } .block-ai-container div.block-ai-info table p { - font-size: 115%; margin-top: 0; } @@ -73,3 +75,7 @@ border-bottom: 2px solid #eee; color: #fff; } + +.block-ai-container .form-table td { + vertical-align: top; +} \ No newline at end of file diff --git a/inc/settings-html.php b/inc/settings-html.php index 0bf5ce5..1e6c92f 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -20,109 +20,105 @@

Blocked Crawlers

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
ChatGPT

Used by OpenAI

More Info
GPTBot

Used by OpenAI to allow ChatGPT to access the web

More Info
Google Extended

Used by Google to power Gemini (formerly known as Bard)

More Info
FacebookBot

Used by Meta (Facebook) for their AI

More Info
CommonCrawl

Compiles datasets used to train AI models

More Info
ClaudeBot and Claude-Web

Used by Anthropic's Claude

More Info
Bytespider

Used by TikTok for AI training

More Info
Omgilibot

Used by Omigili to scrape data for AI training

More Info
Cohere

Used by Cohere to scrape data for AI training

More Info
Diffbot

Used by Diffbot to scrape data for AI training

More Info
ImagesiftBot

Used by Hive's Imagesift tool that scrapes images. This may be used for the company's generative AI product

More Info
PerplexityBot

Used by Perplexity for their AI products

More Info
AppleBot

Used by Apple for generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools.

More Info
Meta-ExternalAgent / Meta-ExternalFetcher

Used by Meta to train AI products

More Info
YouBot

Used by You.com to train AI products.

More Info
AmazonBot

Used by Amazon's Alexa AI to provide AI answers.

More Info
Timpibot

Used by Timpi; likely for their Wilson AI Product.

More Info
OAI-SearchBot

Used by OpenAI for their SearchGPT product.

More Info
Webzio

Used by Webz.io for their social listening and intelligence platforms.

More Info
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - -
AmazonBot

Used by Amazon's Alexa AI to provide AI answers.

More Info
AppleBot

Used by Apple for generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools.

More Info
Bytespider

Used by TikTok for AI training

More Info
Cohere

Used by Cohere to scrape data for AI training

More Info
ChatGPT

Used by OpenAI

More Info
ClaudeBot and Claude-Web

Used by Anthropic's Claude

More Info
CommonCrawl

Compiles datasets used to train AI models

More Info
Diffbot

Used by Diffbot to scrape data for AI training

More Info
FacebookBot

Used by Meta (Facebook) for their AI

More Info
Google Extended

Used by Google to power Gemini (formerly known as Bard)

More Info
ImagesiftBot

Used by Hive's Imagesift tool that scrapes images. This may be used for the company's generative AI product

More Info
Meta-ExternalAgent / Meta-ExternalFetcher

Used by Meta to train AI products

More Info
OAI-SearchBot

Used by OpenAI for their SearchGPT product.

More Info
Omgilibot

Used by Omigili to scrape data for AI training

More Info
PerplexityBot

Used by Perplexity for their AI products

More Info
Timpibot

Used by Timpi; likely for their Wilson AI Product.

More Info
Webzio

Used by Webz.io for their social listening and intelligence platforms.

More Info
Webzio-Extended

Used by Webz.io for AI training.

More Info
+

Used by Webz.io for AI training.

+ More Info + + + YouBot +

Used by You.com to train AI products.

+ More Info + + +
From 8029c6d55eccf14b67399318930ce31388564c4c Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:58:41 -0400 Subject: [PATCH 6/8] update changelog --- readme.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/readme.txt b/readme.txt index 7dc0ba3..e3ed806 100644 --- a/readme.txt +++ b/readme.txt @@ -73,7 +73,9 @@ No. Search engines follow different `robots.txt` rules. == Changelog == = 1.3.9 = -- New: Block OpenAI Search Bot +- New: Block Webz.io +- New: Block OpenAI Search Bot (SearchGPT) +- Enhancement: Alphabetize list of blocked crawlers - Enhancement: Indicate compatibility with WordPress v6.6 - Enhancement: Add quick link to settings and nudge for rating on plugins page From 04c2ef407274090b55ceea7d162c7c02da673afb Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Thu, 29 Aug 2024 09:28:02 -0400 Subject: [PATCH 7/8] Add AI2Bot. Closes #51 --- block-ai-crawlers.php | 1 + inc/settings-html.php | 5 +++++ readme.txt | 1 + 3 files changed, 7 insertions(+) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index 795fb9d..5eeed68 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -28,6 +28,7 @@ */ function block_ai_robots_txt( $robots ) { $robots .= "\n# Block AI Crawlers\n\n"; + $robots .= "User-agent: AI2Bot\n"; $robots .= "User-agent: AmazonBot\n"; $robots .= "User-agent: Applebot-Extended\n"; $robots .= "User-agent: anthropic-ai\n"; diff --git a/inc/settings-html.php b/inc/settings-html.php index 1e6c92f..6facce7 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -22,6 +22,11 @@

Blocked Crawlers

+ + + + + diff --git a/readme.txt b/readme.txt index e3ed806..b8a481f 100644 --- a/readme.txt +++ b/readme.txt @@ -73,6 +73,7 @@ No. Search engines follow different `robots.txt` rules. == Changelog == = 1.3.9 = +- New: Block AI2Bot - New: Block Webz.io - New: Block OpenAI Search Bot (SearchGPT) - Enhancement: Alphabetize list of blocked crawlers From 24f7bc9acac3f244e612e127e9114ecb93f6adbe Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Thu, 29 Aug 2024 09:34:31 -0400 Subject: [PATCH 8/8] Add PetalBot. Closes #53 --- block-ai-crawlers.php | 1 + readme.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index 5eeed68..815b175 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -48,6 +48,7 @@ function block_ai_robots_txt( $robots ) { $robots .= "User-agent: OAI-SearchBot\n"; $robots .= "User-agent: Omgili\n"; $robots .= "User-agent: Omgilibot\n"; + $robots .= "User-agent: PetalBot\n"; $robots .= "User-agent: PerplexityBot\n"; $robots .= "User-agent: Timpibot\n"; $robots .= "User-agent: YouBot\n"; diff --git a/readme.txt b/readme.txt index b8a481f..0a895e6 100644 --- a/readme.txt +++ b/readme.txt @@ -73,6 +73,7 @@ No. Search engines follow different `robots.txt` rules. == Changelog == = 1.3.9 = +- New: Block PetalBot - New: Block AI2Bot - New: Block Webz.io - New: Block OpenAI Search Bot (SearchGPT)
AI2Bot

Explores sites for web content that is used to train open language models

More Info
AmazonBot

Used by Amazon's Alexa AI to provide AI answers.