Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions classes/local/parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class parser {
protected array $tokenlist;

/** @var int number of (raw) tokens */
private int $count;
protected int $count;

/** @var int position w.r.t. list of (raw) tokens */
private int $position = -1;
Expand Down Expand Up @@ -98,7 +98,7 @@ private function parse_the_right_thing(token $token) {
*
* @return void
*/
private function check_unbalanced_parens(): void {
protected function check_unbalanced_parens(): void {
$parenstack = [];
foreach ($this->tokenlist as $token) {
$type = $token->type;
Expand Down Expand Up @@ -399,7 +399,7 @@ private function peek(int $skip = 0): ?token {
*
* @return token|null
*/
private function read_next(): ?token {
protected function read_next(): ?token {
$nexttoken = $this->peek();
if ($nexttoken !== self::EOF) {
$this->position++;
Expand Down
127 changes: 127 additions & 0 deletions classes/local/shunting_yard.php
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,133 @@ public static function infix_to_rpn(array $tokens): array {
return $output;
}

/**
* Translate unit expression from infix into RPN notation via Dijkstra's shunting yard algorithm,
* because this makes evaluation much easier.
*
* @param array $tokens the tokens forming the expression that is to be translated
* @return array
*/
public static function unit_infix_to_rpn($tokens): array {
$output = [];
$opstack = [];

$lasttoken = null;
$lasttype = null;
$lastvalue = null;
foreach ($tokens as $token) {
$type = $token->type;
$value = $token->value;

if (!is_null($lasttoken)) {
$lasttype = $lasttoken->type;
$lastvalue = $lasttoken->value;
}

// Insert inplicit multiplication sign between two consecutive UNIT tokens.
// For accurate error reporting, the row and column number of the implicit
// multiplication token are copied over from the current token which triggered
// the multiplication.
$unitunit = ($lasttype === token::UNIT && $type === token::UNIT);
$unitparen = ($lasttype === token::UNIT && $type === token::OPENING_PAREN);
$parenunit = ($lasttype === token::CLOSING_PAREN && $type === token::UNIT);
$parenparen = ($lasttype === token::CLOSING_PAREN && $type === token::OPENING_PAREN);
if ($unitunit || $unitparen || $parenunit || $parenparen) {
// For backwards compatibility, division will have a lower precedence than multiplication,
// in order for J / m K to be interpreted as J / (m K). Instead of introducing a special
// 'unit multiplication' pseudo-operator, we simply increase the multiplication's precedence
// by one when flushing operators from the opstack.
self::flush_higher_precedence($opstack, self::get_precedence('*') + 1, $output);
$opstack[] = new token(token::OPERATOR, '*', $token->row, $token->column);
}

// Two consecutive operators are only possible if the unary minus follows exponentiation.
// Note: We do not have to check whether the first of them is exponentiation, because we
// only allow - in the exponent anyway.
if ($type === token::OPERATOR && $lasttype === token::OPERATOR && $value !== '-') {
self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
}

switch ($type) {
// UNIT tokens go straight to the output queue.
case token::UNIT:
$output[] = $token;
break;

// Numbers go to the output queue.
case token::NUMBER:
// If the last token was the unary minus, we multiply the number by -1 before
// sending it to the output queue. Afterwards, we can remove the minus from the opstack.
if ($lasttype === token::OPERATOR && $lastvalue === '-') {
$token->value = -$token->value;
array_pop($opstack);
}
$output[] = $token;
break;

// Opening parentheses go straight to the operator stack.
case token::OPENING_PAREN:
$opstack[] = $token;
break;

// A closing parenthesis means we flush all operators until we get to the
// matching opening parenthesis.
case token::CLOSING_PAREN:
// A closing parenthesis must not occur immediately after an operator.
if ($lasttype === token::OPERATOR) {
self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
}
self::flush_until_paren($opstack, token::OPENING_PAREN, $output);
break;

// Deal with all the possible operators...
case token::OPERATOR:
// Expressions must not start with an operator.
if (is_null($lasttoken)) {
self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
}
// Operators must not follow an opening parenthesis, except for the unary minus.
if ($lasttype === token::OPENING_PAREN && $value !== '-') {
self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
}
// Before fetching the precedence, we must translate ^ (caret) into **, because
// the ^ operator normally has a different meaning with lower precedence.
if ($value === '^') {
$value = '**';
}
$thisprecedence = self::get_precedence($value);
// We artificially increase the precedence of the division operator, because
// legacy versions used implicit parens around the denominator, e. g.
// the expression J / m K would be interpreted as J / (m * K). This is consistent
// with what tools like Wolfram Alpha do, even though e. g. 1 / 2 3 would be read
// as 3/2 both by Formulas Question and Wolfram Alpha. And even if it were not, it
// is not possible to change that, because it could break existing questions.
if ($value === '*') {
$thisprecedence++;
}
// Flush operators with higher precedence, unless we have a unary minus, because
// it is not left-associative.
if ($value !== '-') {
self::flush_higher_precedence($opstack, $thisprecedence, $output);
}
// Put the operator on the stack.
$opstack[] = $token;
break;

// If we still haven't dealt with the token, there must be a problem with the input.
default:
self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);

}

$lasttoken = $token;
}
// After last token, flush opstack. Last token must be either a number (in exponent),
// a closing parenthesis or a unit.
self::flush_all($opstack, $output);
return $output;
}

/**
* Stop processing and indicate the human readable position (row/column) where the error occurred.
*
Expand Down
3 changes: 3 additions & 0 deletions classes/local/token.php
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ class token {
/** @var int used to designate a token storing an end-of-group marker (closing brace) */
const END_GROUP = 4194304;

/** @var int used to designate a token storing a unit */
const UNIT = 8388608;

/** @var mixed the token's content, will be the name for identifiers */
public $value;

Expand Down
Loading
Loading