diff --git a/src/Checks/Content/TooLongSentenceCheck.php b/src/Checks/Content/TooLongSentenceCheck.php index 82c1e3c0..68d168de 100644 --- a/src/Checks/Content/TooLongSentenceCheck.php +++ b/src/Checks/Content/TooLongSentenceCheck.php @@ -3,19 +3,21 @@ namespace Vormkracht10\Seo\Checks\Content; use Illuminate\Http\Client\Response; -use Symfony\Component\DomCrawler\Crawler; +use Vormkracht10\Seo\Traits\Actions; use Vormkracht10\Seo\Interfaces\Check; +use Symfony\Component\DomCrawler\Crawler; use Vormkracht10\Seo\Traits\PerformCheck; class TooLongSentenceCheck implements Check { - use PerformCheck; + use PerformCheck, + Actions; public string $title = 'Too long sentence check'; public string $priority = 'medium'; - public int $timeToFix = 45; + public int $timeToFix = 30; public int $scoreWeight = 5; @@ -29,30 +31,28 @@ class TooLongSentenceCheck implements Check public function check(Response $response, Crawler $crawler): bool { - if (! $this->validateContent($crawler)) { - return false; + if ($this->validateContent($response, $crawler)) { + return true; } - return true; + return false; } - public function validateContent(Crawler $crawler): bool + public function validateContent(Response $response, Crawler $crawler): bool { - $realSentences = []; - $sentences = $this->getSentencesFromCrawler($crawler); - - $sentences = $this->separateSentencesByDot($sentences); - - $sentencesWithTooManyWords = $this->calculateSentencesWithTooManyWords($sentences); - - $this->actualValue = $this->calculateSentencesWithTooManyWords($sentences); + $phrases = $this->extractPhrases( + $this->getTextContent($response, $crawler) + ); + $sentencesWithTooManyWords = $this->calculateSentencesWithTooManyWords($phrases); + $this->actualValue = $sentencesWithTooManyWords; + if (count($sentencesWithTooManyWords) === 0) { return true; } // If more than 20% of the total sentences are too long, fail - if (count($sentencesWithTooManyWords) / count($sentences) > 0.2) { + if (count($sentencesWithTooManyWords) / count($phrases) > 0.2) { $this->failureReason = __('failed.content.too_long_sentence', [ 'actualValue' => count($this->actualValue), ]); @@ -63,37 +63,6 @@ public function validateContent(Crawler $crawler): bool return true; } - private function separateSentencesByDot(array $sentences): array - { - $newSentences = []; - - foreach ($sentences as $sentence) { - $sentence = explode('.', $sentence); - $newSentences = array_merge($newSentences, $sentence); - } - - // Remove all sentences that are empty - $sentences = array_filter($newSentences, function ($sentence) { - return ! empty($sentence); - }); - - return $sentences; - } - - private function getSentencesFromCrawler(Crawler $crawler): array - { - $content = $crawler->filterXPath('//body')->children(); - - // Get all elements that contain text - $content = $content->filterXPath('//*/text()[normalize-space()]'); - - $content = $content->each(function (Crawler $node, $i) { - return $node->text(); - }); - - return $content; - } - private function calculateSentencesWithTooManyWords(array $sentences): array { $tooLongSentences = []; diff --git a/src/Checks/Content/TransitionWordRatioCheck.php b/src/Checks/Content/TransitionWordRatioCheck.php index a6d17989..a18e5c1f 100644 --- a/src/Checks/Content/TransitionWordRatioCheck.php +++ b/src/Checks/Content/TransitionWordRatioCheck.php @@ -3,16 +3,17 @@ namespace Vormkracht10\Seo\Checks\Content; use Illuminate\Http\Client\Response; -use Readability\Readability; -use Symfony\Component\DomCrawler\Crawler; -use Vormkracht10\Seo\Helpers\TransitionWords; +use Vormkracht10\Seo\Traits\Actions; use Vormkracht10\Seo\Interfaces\Check; +use Symfony\Component\DomCrawler\Crawler; use Vormkracht10\Seo\Traits\PerformCheck; +use Vormkracht10\Seo\Helpers\TransitionWords; class TransitionWordRatioCheck implements Check { - use PerformCheck; - + use PerformCheck, + Actions; + public string $title = 'Transition word ratio check'; public string $priority = 'medium'; @@ -40,17 +41,7 @@ public function check(Response $response, Crawler $crawler): bool public function validateContent(Response $response, Crawler $crawler): bool { - $body = $response->body(); - - if ($this->useJavascript) { - $body = $crawler->filter('body')->html(); - } - - $readability = new Readability($body); - - $readability->init(); - - $content = $readability->getContent()->textContent; + $content = $this->getTextContent($response, $crawler); if ($content == 'Sorry, Readability was unable to parse this page for content.') { $this->failureReason = __('failed.content.length.parse'); @@ -75,15 +66,9 @@ public function validateContent(Response $response, Crawler $crawler): bool public function calculatePercentageOfTransitionWordsInContent($content, $transitionWords) { - // Get phrases seperate by new line, dot, exclamation mark or question mark - $phrases = preg_split('/\n|\.|\!|\?/', $content); - - // Count all phrases where it has more than 5 words - $totalPhrases = array_filter($phrases, function ($phrase) { - return str_word_count($phrase) > 5; - }); + $phrases = $this->extractPhrases($content); - if (count($totalPhrases) === 0) { + if (count($phrases) === 0) { $this->actualValue = 0; $this->failureReason = __('failed.content.transition_words_ratio_check.no_phrases_found'); @@ -96,7 +81,7 @@ public function calculatePercentageOfTransitionWordsInContent($content, $transit $phrasesWithTransitionWord += $this->calculateNumberOfPhrasesWithTransitionWord($content, $transitionWord); } - return round($phrasesWithTransitionWord / count($totalPhrases) * 100, 0, PHP_ROUND_HALF_UP); + return round($phrasesWithTransitionWord / count($phrases) * 100, 0, PHP_ROUND_HALF_UP); } public function calculateNumberOfPhrasesWithTransitionWord(string $content, string $transitionWord): int diff --git a/src/Traits/Actions.php b/src/Traits/Actions.php new file mode 100644 index 00000000..a6c21c1e --- /dev/null +++ b/src/Traits/Actions.php @@ -0,0 +1,32 @@ +body(); + + if ($this->useJavascript) { + $body = $crawler->filter('body')->html(); + } + + $readability = new Readability($body); + + $readability->init(); + + return $readability->getContent()->textContent; + } + + private function extractPhrases(string $content): array + { + // Get phrases seperate by new line, dot, exclamation mark or question mark + return preg_split('/\n|\.|\!|\?/', $content); + } +}