Skip to content

Commit

Permalink
Merge pull request #550 from leepeuker/fix-imdb-rating-scrapper
Browse files Browse the repository at this point in the history
Update imdb rating scrapping with latest html changes
  • Loading branch information
leepeuker authored Dec 3, 2023
2 parents 35de7b9 + a55c5d8 commit ddeb6a8
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 30 deletions.
14 changes: 7 additions & 7 deletions src/Api/Imdb/ImdbWebScrapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ private function extractProductionStatus(string $imdbRatingPage) : ?string

private function extractRatingAverage(string $imdbRatingPage, string $imdbId) : ?float
{
preg_match('/iZlgcd">(\d([.,])\d)/', $imdbRatingPage, $averageRatingMatches);
preg_match('/cMEQkK">(\d([.,])\d)/', $imdbRatingPage, $averageRatingMatches);
if (empty($averageRatingMatches[1]) === true) {
$this->logger->warning('IMDb: Could not extract rating average.', ['url' => $this->urlGenerator->buildMovieUrl($imdbId)]);

Expand All @@ -83,32 +83,32 @@ private function extractRatingAverage(string $imdbRatingPage, string $imdbId) :
private function extractRatingVoteCount(string $imdbRatingPage, string $imdbId) : ?int
{
// Handle numbers without suffix
preg_match('/bjjENQ">([0-9]+)</', $imdbRatingPage, $voteCountMatches);
preg_match('/gPVQxL">([0-9]+)</', $imdbRatingPage, $voteCountMatches);
if (empty($voteCountMatches[1]) === false) {
return (int)$voteCountMatches[1];
}
preg_match('/bjjENQ">([0-9]{1,3}([.,]?[0-9]{3})+)/', $imdbRatingPage, $voteCountMatches);
preg_match('/gPVQxL">([0-9]{1,3}([.,]?[0-9]{3})+)/', $imdbRatingPage, $voteCountMatches);
if (empty($voteCountMatches[1]) === false) {
return (int)str_replace([',', '.'], '', $voteCountMatches[1]);
}

// Handle numbers with K suffix
preg_match('/bjjENQ">([0-9]+)K</', $imdbRatingPage, $voteCountMatches);
preg_match('/gPVQxL">([0-9]+)K</', $imdbRatingPage, $voteCountMatches);
if (empty($voteCountMatches[1]) === false) {
return (int)$voteCountMatches[1] * 1000;
}
preg_match('/bjjENQ">([0-9]{1,3}[.,][0-9]{1,3})K</', $imdbRatingPage, $voteCountMatches);
preg_match('/gPVQxL">([0-9]{1,3}[.,][0-9]{1,3})K</', $imdbRatingPage, $voteCountMatches);
if (empty($voteCountMatches[1]) === false) {
return (int)((float)$voteCountMatches[1] * 1000);
}

// Handle simple numbers with M suffix
preg_match('/bjjENQ">([0-9]+)M</', $imdbRatingPage, $voteCountMatches);
preg_match('/gPVQxL">([0-9]+)M</', $imdbRatingPage, $voteCountMatches);
if (empty($voteCountMatches[1]) === false) {
return (int)$voteCountMatches[1] * 1000000;
}
// Handle simple numbers with K suffix
preg_match('/bjjENQ">([0-9]{1,3}[.,][0-9]{1,3})M</', $imdbRatingPage, $voteCountMatches);
preg_match('/gPVQxL">([0-9]{1,3}[.,][0-9]{1,3})M</', $imdbRatingPage, $voteCountMatches);
if (empty($voteCountMatches[1]) === false) {
return (int)((float)$voteCountMatches[1] * 1000000);
}
Expand Down
46 changes: 23 additions & 23 deletions tests/unit/Api/Imdb/ImdbWebScrapperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,63 +27,63 @@ public function provideFindRatingData() : array
{
return [
[
'iZlgcd">7.9</span>
bjjENQ">229.240</div>',
'cMEQkK">7.9</span>
gPVQxL">229.240</div>',
ImdbRating::create(7.9, 229240)
],
'returns no rating if current production status is found' => [
'hjAonB">Post-production
iZlgcd">7.9</span>
bjjENQ">229.240</div>',
cMEQkK">7.9</span>
gPVQxL">229.240</div>',
null,
],
[
'iZlgcd">7,9</span>
bjjENQ">229,240</div>',
'cMEQkK">7,9</span>
gPVQxL">229,240</div>',
ImdbRating::create(7.9, 229240)
],
[
'iZlgcd">7,9</span>
bjjENQ">229240</div>',
'cMEQkK">7,9</span>
gPVQxL">229240</div>',
ImdbRating::create(7.9, 229240)
],
[
'iZlgcd">7,9</span>
bjjENQ">1.229,240</div>',
'cMEQkK">7,9</span>
gPVQxL">1.229,240</div>',
ImdbRating::create(7.9, 1229240)
],
[
'iZlgcd">7,9</span>
bjjENQ">40</div>',
'cMEQkK">7,9</span>
gPVQxL">40</div>',
ImdbRating::create(7.9, 40)
],
[
'iZlgcd">7,9</span>
bjjENQ">40K</div>',
'cMEQkK">7,9</span>
gPVQxL">40K</div>',
ImdbRating::create(7.9, 40000)
],
[
'iZlgcd">7,9</span>
bjjENQ">4.1K</div>',
'cMEQkK">7,9</span>
gPVQxL">4.1K</div>',
ImdbRating::create(7.9, 4100)
],
[
'iZlgcd">7,9</span>
bjjENQ">14.12K</div>',
'cMEQkK">7,9</span>
gPVQxL">14.12K</div>',
ImdbRating::create(7.9, 14120)
],
[
'iZlgcd">7,9</span>
bjjENQ">10M</div>',
'cMEQkK">7,9</span>
gPVQxL">10M</div>',
ImdbRating::create(7.9, 10000000)
],
[
'iZlgcd">7,9</span>
bjjENQ">10.1M</div>',
'cMEQkK">7,9</span>
gPVQxL">10.1M</div>',
ImdbRating::create(7.9, 10100000)
],
[
'iZlgcd">7,9</span>',
'cMEQkK">7,9</span>',
null
],
[
Expand Down

0 comments on commit ddeb6a8

Please sign in to comment.