From 2d316e2e0146ac862c0981db67cea74033aefda8 Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:29:21 -0400 Subject: [PATCH 1/8] Add plugins page settings + review nudge. Closes #49 --- block-ai-crawlers.php | 52 ++++++++++++++++++++++++++++++++++++++++++- readme.txt | 7 ++++-- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index d3fadfb..bfc5cdd 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -5,7 +5,7 @@ * Author: Bob Matyas * Author URI: https://www.bobmatyas.com * Text Domain: block-ai-crawlers - * Version: 1.3.8 + * Version: 1.3.9 * License: GPL-2.0-or-later * License URI: https://www.gnu.org/licenses/gpl-2.0.html * @@ -74,3 +74,53 @@ function block_ai_activate() { } register_activation_hook( __FILE__, 'block_ai_activate' ); + +add_filter( 'plugin_action_links', 'block_ai_prepend_plugin_settings_link', 10, 2 ); + +/** + * Adds seettings link to plugins page + * + * @param array $links_array An array of the plugin's metadata. + * @param string $plugin_file_name Path to the plugin file. + * @return array $links_array + */ +function block_ai_prepend_plugin_settings_link( $links_array, $plugin_file_name ) { + if ( strpos( $plugin_file_name, basename( __FILE__ ) ) ) { + array_unshift( $links_array, 'Settings' ); + } + return $links_array; +} + + +/** + * Adds ratings nudge to plugins page + * + * @access public + * @param array $links_array An array of the plugin's metadata. + * @param string $plugin_file_name Path to the plugin file. + * @return array $links_array + */ +function block_ai_append_plugin_rating( $links_array, $plugin_file_name ) { + if ( strpos( $plugin_file_name, basename( __FILE__ ) ) ) { + + $links_array[] = " + " + . "" + . "" + . "" + . "" + . "" + . ''; + + $stars_color = '#ffb900'; + + echo ''; + } + + return $links_array; +} + +add_filter( 'plugin_row_meta', 'block_ai_append_plugin_rating', 10, 4 ); \ No newline at end of file diff --git a/readme.txt b/readme.txt index cf117ab..87cf8ec 100644 --- a/readme.txt +++ b/readme.txt @@ -2,9 +2,9 @@ Contributors: lastsplash Tags: ai, robots.txt, chatgpt, crawlers Requires at least: 5.6 -Tested up to: 6.5.3 +Tested up to: 6.6 Requires PHP: 7.4 -Stable tag: 1.3.8 +Stable tag: 1.3.9 License: GPLv2 or later License URI: https://www.gnu.org/licenses/gpl-2.0.html @@ -72,6 +72,9 @@ No. Search engines follow different `robots.txt` rules. == Changelog == += 1.3.9 = +- Enhancement: Add quick link to settings and nudge for rating on plugins page + = 1.3.8 = - Maintenance: Auto-deply from Github fixed / bumped version number From 89f7a6f997cef76ab694013e7c4e44653c73a7be Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:31:31 -0400 Subject: [PATCH 2/8] Update readme.txt. Closes #48 --- readme.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.txt b/readme.txt index 87cf8ec..4cf1717 100644 --- a/readme.txt +++ b/readme.txt @@ -8,7 +8,7 @@ Stable tag: 1.3.9 License: GPLv2 or later License URI: https://www.gnu.org/licenses/gpl-2.0.html -Tell AI crawlers not to access your site to train their models. +Tells AI companies not to access and scrape your site for AI. == Description == From 87d511f0f8567068b667039e0979abf195501aab Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:39:38 -0400 Subject: [PATCH 3/8] Block SearchGPT. Closes #47 --- block-ai-crawlers.php | 3 ++- inc/settings-html.php | 7 ++++++- readme.txt | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index bfc5cdd..5bb2cac 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -44,10 +44,11 @@ function block_ai_robots_txt( $robots ) { $robots .= "User-agent: ImagesiftBot\n"; $robots .= "User-agent: Meta-ExternalAgent\n"; $robots .= "User-agent: Meta-ExternalFetcher\n"; + $robots .= "User-agent: OAI-SearchBot\n"; $robots .= "User-agent: Omgili\n"; $robots .= "User-agent: Omgilibot\n"; $robots .= "User-agent: PerplexityBot\n"; - $robots .= "User-agent: Timpibot\n"; + $robots .= "User-agent: Timpibot\n"; $robots .= "User-agent: YouBot\n"; $robots .= "Disallow: /\n\n"; $robots .= "# End Block AI Crawlers\n"; diff --git a/inc/settings-html.php b/inc/settings-html.php index 592af47..9678530 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -74,7 +74,7 @@
Used by Hive's Imagesift tool that scrapes images. THis may be used for the company's generative AI product
Used by Hive's Imagesift tool that scrapes images. This may be used for the company's generative AI product
Used by Timpi; likely for their Wilson AI Product.
Used by OpenAI for their SearchGPT product.
Used by OpenAI for their SearchGPT product.
Used by Webz.io for their social listening and intelligence platforms.
Used by Webz.io for AI training.
ChatGPT | -Used by OpenAI |
- More Info | -
---|---|---|
GPTBot | -Used by OpenAI to allow ChatGPT to access the web |
- More Info | -
Google Extended | -Used by Google to power Gemini (formerly known as Bard) |
- More Info | -
FacebookBot | -Used by Meta (Facebook) for their AI |
- More Info | -
CommonCrawl | -Compiles datasets used to train AI models |
- More Info | -
ClaudeBot and Claude-Web | -Used by Anthropic's Claude |
- More Info | -
Bytespider | -Used by TikTok for AI training |
- More Info | -
Omgilibot | -Used by Omigili to scrape data for AI training |
- More Info | -
Cohere | -Used by Cohere to scrape data for AI training |
- More Info | -
Diffbot | -Used by Diffbot to scrape data for AI training |
- More Info | -
ImagesiftBot | -Used by Hive's Imagesift tool that scrapes images. This may be used for the company's generative AI product |
- More Info | -
PerplexityBot | -Used by Perplexity for their AI products |
- More Info | -
AppleBot | -Used by Apple for generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools. |
- More Info | -
Meta-ExternalAgent / Meta-ExternalFetcher | -Used by Meta to train AI products |
- More Info | -
YouBot | -Used by You.com to train AI products. |
- More Info | -
AmazonBot | -Used by Amazon's Alexa AI to provide AI answers. |
- More Info | -
Timpibot | -Used by Timpi; likely for their Wilson AI Product. |
- More Info | -
OAI-SearchBot | -Used by OpenAI for their SearchGPT product. |
- More Info | -
Webzio | -Used by Webz.io for their social listening and intelligence platforms. |
- More Info | -
AmazonBot | +Used by Amazon's Alexa AI to provide AI answers. |
+ More Info | +
---|---|---|
AppleBot | +Used by Apple for generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools. |
+ More Info | +
Bytespider | +Used by TikTok for AI training |
+ More Info | +
Cohere | +Used by Cohere to scrape data for AI training |
+ More Info | +
ChatGPT | +Used by OpenAI |
+ More Info | +
ClaudeBot and Claude-Web | +Used by Anthropic's Claude |
+ More Info | +
CommonCrawl | +Compiles datasets used to train AI models |
+ More Info | +
Diffbot | +Used by Diffbot to scrape data for AI training |
+ More Info | +
FacebookBot | +Used by Meta (Facebook) for their AI |
+ More Info | +
Google Extended | +Used by Google to power Gemini (formerly known as Bard) |
+ More Info | +
ImagesiftBot | +Used by Hive's Imagesift tool that scrapes images. This may be used for the company's generative AI product |
+ More Info | +
Meta-ExternalAgent / Meta-ExternalFetcher | +Used by Meta to train AI products |
+ More Info | +
OAI-SearchBot | +Used by OpenAI for their SearchGPT product. |
+ More Info | +
Omgilibot | +Used by Omigili to scrape data for AI training |
+ More Info | +
PerplexityBot | +Used by Perplexity for their AI products |
+ More Info | +
Timpibot | +Used by Timpi; likely for their Wilson AI Product. |
+ More Info | +
Webzio | +Used by Webz.io for their social listening and intelligence platforms. |
+ More Info | +
Webzio-Extended | -Used by Webz.io for AI training. |
- More Info | -
Used by Webz.io for AI training.
Used by You.com to train AI products.
AI2Bot | +Explores sites for web content that is used to train open language models |
+ More Info | +
---|---|---|
AmazonBot | Used by Amazon's Alexa AI to provide AI answers. |
diff --git a/readme.txt b/readme.txt
index e3ed806..b8a481f 100644
--- a/readme.txt
+++ b/readme.txt
@@ -73,6 +73,7 @@ No. Search engines follow different `robots.txt` rules.
== Changelog ==
= 1.3.9 =
+- New: Block AI2Bot
- New: Block Webz.io
- New: Block OpenAI Search Bot (SearchGPT)
- Enhancement: Alphabetize list of blocked crawlers
From 24f7bc9acac3f244e612e127e9114ecb93f6adbe Mon Sep 17 00:00:00 2001
From: bob <45246438+bobmatyas@users.noreply.github.com>
Date: Thu, 29 Aug 2024 09:34:31 -0400
Subject: [PATCH 8/8] Add PetalBot. Closes #53
---
block-ai-crawlers.php | 1 +
readme.txt | 1 +
2 files changed, 2 insertions(+)
diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php
index 5eeed68..815b175 100644
--- a/block-ai-crawlers.php
+++ b/block-ai-crawlers.php
@@ -48,6 +48,7 @@ function block_ai_robots_txt( $robots ) {
$robots .= "User-agent: OAI-SearchBot\n";
$robots .= "User-agent: Omgili\n";
$robots .= "User-agent: Omgilibot\n";
+ $robots .= "User-agent: PetalBot\n";
$robots .= "User-agent: PerplexityBot\n";
$robots .= "User-agent: Timpibot\n";
$robots .= "User-agent: YouBot\n";
diff --git a/readme.txt b/readme.txt
index b8a481f..0a895e6 100644
--- a/readme.txt
+++ b/readme.txt
@@ -73,6 +73,7 @@ No. Search engines follow different `robots.txt` rules.
== Changelog ==
= 1.3.9 =
+- New: Block PetalBot
- New: Block AI2Bot
- New: Block Webz.io
- New: Block OpenAI Search Bot (SearchGPT)