Skip to content

Commit

Permalink
Improve TooLongSentenceCheck and make Action trait
Browse files Browse the repository at this point in the history
  • Loading branch information
Baspa committed Aug 17, 2023
1 parent 1fd49dc commit 5afdfee
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 72 deletions.
63 changes: 16 additions & 47 deletions src/Checks/Content/TooLongSentenceCheck.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,21 @@
namespace Vormkracht10\Seo\Checks\Content;

use Illuminate\Http\Client\Response;
use Symfony\Component\DomCrawler\Crawler;
use Vormkracht10\Seo\Traits\Actions;
use Vormkracht10\Seo\Interfaces\Check;
use Symfony\Component\DomCrawler\Crawler;
use Vormkracht10\Seo\Traits\PerformCheck;

class TooLongSentenceCheck implements Check
{
use PerformCheck;
use PerformCheck,
Actions;

public string $title = 'Too long sentence check';

public string $priority = 'medium';

public int $timeToFix = 45;
public int $timeToFix = 30;

public int $scoreWeight = 5;

Expand All @@ -29,30 +31,28 @@ class TooLongSentenceCheck implements Check

public function check(Response $response, Crawler $crawler): bool
{
if (! $this->validateContent($crawler)) {
return false;
if ($this->validateContent($response, $crawler)) {
return true;
}

return true;
return false;
}

public function validateContent(Crawler $crawler): bool
public function validateContent(Response $response, Crawler $crawler): bool
{
$realSentences = [];
$sentences = $this->getSentencesFromCrawler($crawler);

$sentences = $this->separateSentencesByDot($sentences);

$sentencesWithTooManyWords = $this->calculateSentencesWithTooManyWords($sentences);

$this->actualValue = $this->calculateSentencesWithTooManyWords($sentences);
$phrases = $this->extractPhrases(
$this->getTextContent($response, $crawler)
);

$sentencesWithTooManyWords = $this->calculateSentencesWithTooManyWords($phrases);
$this->actualValue = $sentencesWithTooManyWords;

if (count($sentencesWithTooManyWords) === 0) {
return true;
}

// If more than 20% of the total sentences are too long, fail
if (count($sentencesWithTooManyWords) / count($sentences) > 0.2) {
if (count($sentencesWithTooManyWords) / count($phrases) > 0.2) {
$this->failureReason = __('failed.content.too_long_sentence', [
'actualValue' => count($this->actualValue),
]);
Expand All @@ -63,37 +63,6 @@ public function validateContent(Crawler $crawler): bool
return true;
}

private function separateSentencesByDot(array $sentences): array
{
$newSentences = [];

foreach ($sentences as $sentence) {
$sentence = explode('.', $sentence);
$newSentences = array_merge($newSentences, $sentence);
}

// Remove all sentences that are empty
$sentences = array_filter($newSentences, function ($sentence) {
return ! empty($sentence);
});

return $sentences;
}

private function getSentencesFromCrawler(Crawler $crawler): array
{
$content = $crawler->filterXPath('//body')->children();

// Get all elements that contain text
$content = $content->filterXPath('//*/text()[normalize-space()]');

$content = $content->each(function (Crawler $node, $i) {
return $node->text();
});

return $content;
}

private function calculateSentencesWithTooManyWords(array $sentences): array
{
$tooLongSentences = [];
Expand Down
35 changes: 10 additions & 25 deletions src/Checks/Content/TransitionWordRatioCheck.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
namespace Vormkracht10\Seo\Checks\Content;

use Illuminate\Http\Client\Response;
use Readability\Readability;
use Symfony\Component\DomCrawler\Crawler;
use Vormkracht10\Seo\Helpers\TransitionWords;
use Vormkracht10\Seo\Traits\Actions;
use Vormkracht10\Seo\Interfaces\Check;
use Symfony\Component\DomCrawler\Crawler;
use Vormkracht10\Seo\Traits\PerformCheck;
use Vormkracht10\Seo\Helpers\TransitionWords;

class TransitionWordRatioCheck implements Check
{
use PerformCheck;

use PerformCheck,
Actions;

public string $title = 'Transition word ratio check';

public string $priority = 'medium';
Expand Down Expand Up @@ -40,17 +41,7 @@ public function check(Response $response, Crawler $crawler): bool

public function validateContent(Response $response, Crawler $crawler): bool
{
$body = $response->body();

if ($this->useJavascript) {
$body = $crawler->filter('body')->html();
}

$readability = new Readability($body);

$readability->init();

$content = $readability->getContent()->textContent;
$content = $this->getTextContent($response, $crawler);

if ($content == 'Sorry, Readability was unable to parse this page for content.') {
$this->failureReason = __('failed.content.length.parse');
Expand All @@ -75,15 +66,9 @@ public function validateContent(Response $response, Crawler $crawler): bool

public function calculatePercentageOfTransitionWordsInContent($content, $transitionWords)
{
// Get phrases seperate by new line, dot, exclamation mark or question mark
$phrases = preg_split('/\n|\.|\!|\?/', $content);

// Count all phrases where it has more than 5 words
$totalPhrases = array_filter($phrases, function ($phrase) {
return str_word_count($phrase) > 5;
});
$phrases = $this->extractPhrases($content);

if (count($totalPhrases) === 0) {
if (count($phrases) === 0) {
$this->actualValue = 0;
$this->failureReason = __('failed.content.transition_words_ratio_check.no_phrases_found');

Expand All @@ -96,7 +81,7 @@ public function calculatePercentageOfTransitionWordsInContent($content, $transit
$phrasesWithTransitionWord += $this->calculateNumberOfPhrasesWithTransitionWord($content, $transitionWord);
}

return round($phrasesWithTransitionWord / count($totalPhrases) * 100, 0, PHP_ROUND_HALF_UP);
return round($phrasesWithTransitionWord / count($phrases) * 100, 0, PHP_ROUND_HALF_UP);
}

public function calculateNumberOfPhrasesWithTransitionWord(string $content, string $transitionWord): int
Expand Down
32 changes: 32 additions & 0 deletions src/Traits/Actions.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

namespace Vormkracht10\Seo\Traits;

use Closure;
use Readability\Readability;
use Illuminate\Http\Client\Response;
use Symfony\Component\DomCrawler\Crawler;

trait Actions
{
private function getTextContent(Response $response, Crawler $crawler): string
{
$body = $response->body();

if ($this->useJavascript) {
$body = $crawler->filter('body')->html();
}

$readability = new Readability($body);

$readability->init();

return $readability->getContent()->textContent;
}

private function extractPhrases(string $content): array
{
// Get phrases seperate by new line, dot, exclamation mark or question mark
return preg_split('/\n|\.|\!|\?/', $content);
}
}

0 comments on commit 5afdfee

Please sign in to comment.