Skip to content

Commit

Permalink
Merge pull request #4 from heathdutton/master
Browse files Browse the repository at this point in the history
Prevent exceptions when different cased idioms / Fix infinite loops when encountering numbers.
  • Loading branch information
davmixcool authored Jul 11, 2020
2 parents 7cfd4bf + e7e238c commit 13dc120
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 19 deletions.
9 changes: 5 additions & 4 deletions src/Analyzer.php
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ public function getSentiment($text)

$text_no_emoji = '';
$prev_space = true;
foreach($this->str_split_unicode($text) as $unichr ) {

foreach($this->str_split_unicode($text) as $unichr ) {
if (array_key_exists($unichr, $this->emojis)) {
$description = $this->emojis[$unichr];
if (!($prev_space)) {
Expand Down Expand Up @@ -384,8 +384,9 @@ public function _idioms_check($wordInContext, $valence)
$sequences = [$onezero, $twoonezero, $twoone, $threetwoone, $threetwo];

foreach ($sequences as $seq) {
if (array_key_exists(strtolower($seq), Config::SPECIAL_CASE_IDIOMS)) {
$valence = Config::SPECIAL_CASE_IDIOMS[$seq];
$key = strtolower($seq);
if (array_key_exists($key, Config::SPECIAL_CASE_IDIOMS)) {
$valence = Config::SPECIAL_CASE_IDIOMS[$key];
break;
}

Expand Down
30 changes: 15 additions & 15 deletions src/Procedures/SentiText.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@

class SentiText
{

private $text = "";
public $words_and_emoticons = null;
public $is_cap_diff = null;

const PUNC_LIST = [".", "!", "?", ",", ";", ":", "-", "'", "\"",
"!!", "!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?"];


function __construct($text)
{
//checking that is string
Expand All @@ -29,7 +29,7 @@ function __construct($text)
// adjacent punctuation (keeps emoticons & contractions)
$this->is_cap_diff = $this->allcap_differential($this->words_and_emoticons);
}

/*
Remove all punctation from a string
*/
Expand All @@ -38,16 +38,16 @@ function strip_punctuation($string)
//$string = strtolower($string);
return preg_replace("/[[:punct:]]+/", "", $string);
}

function array_count_values_of($haystack, $needle)
{
if (!in_array($needle, $haystack)) {
if (!in_array($needle, $haystack, true)) {
return 0;
}
$counts = array_count_values($haystack);
return $counts[$needle];
}

/*
Check whether just some words in the input are ALL CAPS
Expand All @@ -71,7 +71,7 @@ private function allcap_differential($words)
}
return $is_different;
}

function _words_only()
{
$text_mod = $this->strip_punctuation($this->text);
Expand All @@ -86,26 +86,26 @@ function _words_only()

function _words_and_emoticons()
{

$wes = preg_split('/\s+/', $this->text);

# get rid of residual empty items or single letter words
$wes = array_filter($wes, function ($word) {
return strlen($word) > 1;
});
//Need to remap the indexes of the array
$wes = array_values($wes);
$words_only = $this->_words_only();

foreach ($words_only as $word) {
foreach (self::PUNC_LIST as $punct) {
//replace all punct + word combinations with word
$pword = $punct .$word;


$x1 = $this->array_count_values_of($wes, $pword);
while ($x1 > 0) {
$i = array_search($pword, $wes);
$i = array_search($pword, $wes, true);
unset($wes[$i]);
array_splice($wes, $i, 0, $word);
$x1 = $this->array_count_values_of($wes, $pword);
Expand All @@ -114,7 +114,7 @@ function _words_and_emoticons()
$wordp = $word . $punct;
$x2 = $this->array_count_values_of($wes, $wordp);
while ($x2 > 0) {
$i = array_search($wordp, $wes);
$i = array_search($wordp, $wes, true);
unset($wes[$i]);
array_splice($wes, $i, 0, $word);
$x2 = $this->array_count_values_of($wes, $wordp);
Expand Down

0 comments on commit 13dc120

Please sign in to comment.