From ddbd0ad893505e6095870a86c0f427e2169e1377 Mon Sep 17 00:00:00 2001 From: otsch Date: Fri, 23 Sep 2022 02:08:59 +0200 Subject: [PATCH] Add method isExplicitlyNotAllowedFor() It ignores rules for wildcard user-agent (`*`) and checks if some path is explicitly not allowed for a certain user-agent. --- CHANGELOG.md | 4 ++++ src/RobotsTxt.php | 23 +++++++++++++++++++++-- src/UserAgentGroup.php | 10 ++++++++-- tests/RobotsTxtTest.php | 22 ++++++++++++++++++++++ tests/UserAgentGroupTest.php | 14 ++++++++++++++ 5 files changed, 69 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f94305..c6ba5fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.1.0] - 2022-09-23 +### Added +- Method `isExplicitlyNotAllowedFor()` that ignores rules for wildcard user-agent (`*`) and checks if some path is explicitly not allowed for a certain user-agent. + ## [1.0.0] - 2022-09-22 ### Changed - Required PHP version is now 8.0. diff --git a/src/RobotsTxt.php b/src/RobotsTxt.php index fd04993..2e54845 100644 --- a/src/RobotsTxt.php +++ b/src/RobotsTxt.php @@ -64,17 +64,36 @@ public function isAllowed(string $uri, string $userAgent): bool return $group->isAllowed($uri); } + /** + * @throws Exception + */ + public function isExplicitlyNotAllowedFor(string $uri, string $userAgent): bool + { + $matchingGroups = $this->getGroupsMatchingUserAgent($userAgent, false); + + $groupCount = count($matchingGroups); + + if ($groupCount === 0) { + return false; + } + + $group = $groupCount === 1 ? $matchingGroups[0] : $this->combineGroups($matchingGroups); + + return !$group->isAllowed($uri); + } + /** * Find all groups that match a certain user agent string. * + * @param bool $includeWildcard Set to false if wildcard (*) should not count (user agent explicitly in group) * @return UserAgentGroup[] */ - private function getGroupsMatchingUserAgent(string $userAgent): array + private function getGroupsMatchingUserAgent(string $userAgent, bool $includeWildcard = true): array { $matchingGroups = []; foreach ($this->groups() as $group) { - if ($group->contains($userAgent)) { + if ($group->contains($userAgent, $includeWildcard)) { $matchingGroups[] = $group; } } diff --git a/src/UserAgentGroup.php b/src/UserAgentGroup.php index 768cf69..a2f375a 100644 --- a/src/UserAgentGroup.php +++ b/src/UserAgentGroup.php @@ -30,10 +30,16 @@ public function __construct(private array $userAgents) } } - public function contains(string $userAgent): bool + /** + * @param bool $includeWildcard Set to false if wildcard (*) should not count (user agent explicitly in group) + */ + public function contains(string $userAgent, bool $includeWildcard = true): bool { foreach ($this->userAgents as $groupUserAgent) { - if ($groupUserAgent === '*' || strtolower($groupUserAgent) === strtolower($userAgent)) { + if ( + ($includeWildcard && $groupUserAgent === '*') || + strtolower($groupUserAgent) === strtolower($userAgent) + ) { return true; } } diff --git a/tests/RobotsTxtTest.php b/tests/RobotsTxtTest.php index aa2c29c..a334c20 100644 --- a/tests/RobotsTxtTest.php +++ b/tests/RobotsTxtTest.php @@ -115,4 +115,26 @@ public function testMatchingDisallowedRuleAndMoreSpecificMatchingAllowedRuleButA $this->assertFalse($robotsTxt->isAllowed('/foo/bar', 'FooBot')); } + + public function test_is_explicitly_not_allowed_for_returns_true_when_a_disallow_rule_is_for_explicit_user_agent(): void + { + $group = new UserAgentGroup(['FooBot']); + + $group->addDisallowedPattern(new RulePattern('/foo')); + + $robotsTxt = new RobotsTxt([$group]); + + $this->assertTrue($robotsTxt->isExplicitlyNotAllowedFor('/foo/bar', 'FooBot')); + } + + public function test_is_explicitly_not_allowed_for_returns_false_when_the_group_of_a_disallow_rule_contains_wildcard_user_agent(): void + { + $group = new UserAgentGroup(['BarBot', '*']); + + $group->addDisallowedPattern(new RulePattern('/foo')); + + $robotsTxt = new RobotsTxt([$group]); + + $this->assertFalse($robotsTxt->isExplicitlyNotAllowedFor('/foo/bar', 'FooBot')); + } } diff --git a/tests/UserAgentGroupTest.php b/tests/UserAgentGroupTest.php index f76ea87..ea23c98 100644 --- a/tests/UserAgentGroupTest.php +++ b/tests/UserAgentGroupTest.php @@ -43,6 +43,20 @@ public function test_contains_returns_true_when_user_agent_is_contained_case_ins $this->assertTrue($userAgentGroup->contains('FOOBOT')); } + public function test_contains_returns_true_when_wildcard_is_in_group(): void + { + $userAgentGroup = new UserAgentGroup(['*', 'barbot']); + + $this->assertTrue($userAgentGroup->contains('foobot')); + } + + public function test_contains_return_false_when_wildcard_is_in_group_but_arg_include_wildcard_is_set_to_false(): void + { + $userAgentGroup = new UserAgentGroup(['*', 'barbot']); + + $this->assertFalse($userAgentGroup->contains('foobot', false)); + } + public function test_adding_a_disallow_rule_pattern(): void { $rulePattern = new RulePattern('/foo/bar');