Skip to content

Commit

Permalink
Add method isExplicitlyNotAllowedFor()
Browse files Browse the repository at this point in the history
It ignores rules for wildcard user-agent (`*`) and checks if some path
is explicitly not allowed for a certain user-agent.
  • Loading branch information
otsch committed Sep 23, 2022
1 parent 993e05e commit ddbd0ad
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [1.1.0] - 2022-09-23
### Added
- Method `isExplicitlyNotAllowedFor()` that ignores rules for wildcard user-agent (`*`) and checks if some path is explicitly not allowed for a certain user-agent.

## [1.0.0] - 2022-09-22
### Changed
- Required PHP version is now 8.0.
Expand Down
23 changes: 21 additions & 2 deletions src/RobotsTxt.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,36 @@ public function isAllowed(string $uri, string $userAgent): bool
return $group->isAllowed($uri);
}

/**
* @throws Exception
*/
public function isExplicitlyNotAllowedFor(string $uri, string $userAgent): bool
{
$matchingGroups = $this->getGroupsMatchingUserAgent($userAgent, false);

$groupCount = count($matchingGroups);

if ($groupCount === 0) {
return false;
}

$group = $groupCount === 1 ? $matchingGroups[0] : $this->combineGroups($matchingGroups);

return !$group->isAllowed($uri);
}

/**
* Find all groups that match a certain user agent string.
*
* @param bool $includeWildcard Set to false if wildcard (*) should not count (user agent explicitly in group)
* @return UserAgentGroup[]
*/
private function getGroupsMatchingUserAgent(string $userAgent): array
private function getGroupsMatchingUserAgent(string $userAgent, bool $includeWildcard = true): array
{
$matchingGroups = [];

foreach ($this->groups() as $group) {
if ($group->contains($userAgent)) {
if ($group->contains($userAgent, $includeWildcard)) {
$matchingGroups[] = $group;
}
}
Expand Down
10 changes: 8 additions & 2 deletions src/UserAgentGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,16 @@ public function __construct(private array $userAgents)
}
}

public function contains(string $userAgent): bool
/**
* @param bool $includeWildcard Set to false if wildcard (*) should not count (user agent explicitly in group)
*/
public function contains(string $userAgent, bool $includeWildcard = true): bool
{
foreach ($this->userAgents as $groupUserAgent) {
if ($groupUserAgent === '*' || strtolower($groupUserAgent) === strtolower($userAgent)) {
if (
($includeWildcard && $groupUserAgent === '*') ||
strtolower($groupUserAgent) === strtolower($userAgent)
) {
return true;
}
}
Expand Down
22 changes: 22 additions & 0 deletions tests/RobotsTxtTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,26 @@ public function testMatchingDisallowedRuleAndMoreSpecificMatchingAllowedRuleButA

$this->assertFalse($robotsTxt->isAllowed('/foo/bar', 'FooBot'));
}

public function test_is_explicitly_not_allowed_for_returns_true_when_a_disallow_rule_is_for_explicit_user_agent(): void
{
$group = new UserAgentGroup(['FooBot']);

$group->addDisallowedPattern(new RulePattern('/foo'));

$robotsTxt = new RobotsTxt([$group]);

$this->assertTrue($robotsTxt->isExplicitlyNotAllowedFor('/foo/bar', 'FooBot'));
}

public function test_is_explicitly_not_allowed_for_returns_false_when_the_group_of_a_disallow_rule_contains_wildcard_user_agent(): void
{
$group = new UserAgentGroup(['BarBot', '*']);

$group->addDisallowedPattern(new RulePattern('/foo'));

$robotsTxt = new RobotsTxt([$group]);

$this->assertFalse($robotsTxt->isExplicitlyNotAllowedFor('/foo/bar', 'FooBot'));
}
}
14 changes: 14 additions & 0 deletions tests/UserAgentGroupTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,20 @@ public function test_contains_returns_true_when_user_agent_is_contained_case_ins
$this->assertTrue($userAgentGroup->contains('FOOBOT'));
}

public function test_contains_returns_true_when_wildcard_is_in_group(): void
{
$userAgentGroup = new UserAgentGroup(['*', 'barbot']);

$this->assertTrue($userAgentGroup->contains('foobot'));
}

public function test_contains_return_false_when_wildcard_is_in_group_but_arg_include_wildcard_is_set_to_false(): void
{
$userAgentGroup = new UserAgentGroup(['*', 'barbot']);

$this->assertFalse($userAgentGroup->contains('foobot', false));
}

public function test_adding_a_disallow_rule_pattern(): void
{
$rulePattern = new RulePattern('/foo/bar');
Expand Down

0 comments on commit ddbd0ad

Please sign in to comment.