@@ -528,6 +528,133 @@ public static function infix_to_rpn(array $tokens): array {
528528 return $ output ;
529529 }
530530
531+ /**
532+ * Translate unit expression from infix into RPN notation via Dijkstra's shunting yard algorithm,
533+ * because this makes evaluation much easier.
534+ *
535+ * @param array $tokens the tokens forming the expression that is to be translated
536+ * @return array
537+ */
538+ public static function unit_infix_to_rpn ($ tokens ): array {
539+ $ output = [];
540+ $ opstack = [];
541+
542+ $ lasttoken = null ;
543+ $ lasttype = null ;
544+ $ lastvalue = null ;
545+ foreach ($ tokens as $ token ) {
546+ $ type = $ token ->type ;
547+ $ value = $ token ->value ;
548+
549+ if (!is_null ($ lasttoken )) {
550+ $ lasttype = $ lasttoken ->type ;
551+ $ lastvalue = $ lasttoken ->value ;
552+ }
553+
554+ // Insert inplicit multiplication sign between two consecutive UNIT tokens.
555+ // For accurate error reporting, the row and column number of the implicit
556+ // multiplication token are copied over from the current token which triggered
557+ // the multiplication.
558+ $ unitunit = ($ lasttype === token::UNIT && $ type === token::UNIT );
559+ $ unitparen = ($ lasttype === token::UNIT && $ type === token::OPENING_PAREN );
560+ $ parenunit = ($ lasttype === token::CLOSING_PAREN && $ type === token::UNIT );
561+ $ parenparen = ($ lasttype === token::CLOSING_PAREN && $ type === token::OPENING_PAREN );
562+ if ($ unitunit || $ unitparen || $ parenunit || $ parenparen ) {
563+ // For backwards compatibility, division will have a lower precedence than multiplication,
564+ // in order for J / m K to be interpreted as J / (m K). Instead of introducing a special
565+ // 'unit multiplication' pseudo-operator, we simply increase the multiplication's precedence
566+ // by one when flushing operators from the opstack.
567+ self ::flush_higher_precedence ($ opstack , self ::get_precedence ('* ' ) + 1 , $ output );
568+ $ opstack [] = new token (token::OPERATOR , '* ' , $ token ->row , $ token ->column );
569+ }
570+
571+ // Two consecutive operators are only possible if the unary minus follows exponentiation.
572+ // Note: We do not have to check whether the first of them is exponentiation, because we
573+ // only allow - in the exponent anyway.
574+ if ($ type === token::OPERATOR && $ lasttype === token::OPERATOR && $ value !== '- ' ) {
575+ self ::die (get_string ('error_unexpectedtoken ' , 'qtype_formulas ' , $ value ), $ token );
576+ }
577+
578+ switch ($ type ) {
579+ // UNIT tokens go straight to the output queue.
580+ case token::UNIT :
581+ $ output [] = $ token ;
582+ break ;
583+
584+ // Numbers go to the output queue.
585+ case token::NUMBER :
586+ // If the last token was the unary minus, we multiply the number by -1 before
587+ // sending it to the output queue. Afterwards, we can remove the minus from the opstack.
588+ if ($ lasttype === token::OPERATOR && $ lastvalue === '- ' ) {
589+ $ token ->value = -$ token ->value ;
590+ array_pop ($ opstack );
591+ }
592+ $ output [] = $ token ;
593+ break ;
594+
595+ // Opening parentheses go straight to the operator stack.
596+ case token::OPENING_PAREN :
597+ $ opstack [] = $ token ;
598+ break ;
599+
600+ // A closing parenthesis means we flush all operators until we get to the
601+ // matching opening parenthesis.
602+ case token::CLOSING_PAREN :
603+ // A closing parenthesis must not occur immediately after an operator.
604+ if ($ lasttype === token::OPERATOR ) {
605+ self ::die (get_string ('error_unexpectedtoken ' , 'qtype_formulas ' , $ value ), $ token );
606+ }
607+ self ::flush_until_paren ($ opstack , token::OPENING_PAREN , $ output );
608+ break ;
609+
610+ // Deal with all the possible operators...
611+ case token::OPERATOR :
612+ // Expressions must not start with an operator.
613+ if (is_null ($ lasttoken )) {
614+ self ::die (get_string ('error_unexpectedtoken ' , 'qtype_formulas ' , $ value ), $ token );
615+ }
616+ // Operators must not follow an opening parenthesis, except for the unary minus.
617+ if ($ lasttype === token::OPENING_PAREN && $ value !== '- ' ) {
618+ self ::die (get_string ('error_unexpectedtoken ' , 'qtype_formulas ' , $ value ), $ token );
619+ }
620+ // Before fetching the precedence, we must translate ^ (caret) into **, because
621+ // the ^ operator normally has a different meaning with lower precedence.
622+ if ($ value === '^ ' ) {
623+ $ value = '** ' ;
624+ }
625+ $ thisprecedence = self ::get_precedence ($ value );
626+ // We artificially increase the precedence of the division operator, because
627+ // legacy versions used implicit parens around the denominator, e. g.
628+ // the expression J / m K would be interpreted as J / (m * K). This is consistent
629+ // with what tools like Wolfram Alpha do, even though e. g. 1 / 2 3 would be read
630+ // as 3/2 both by Formulas Question and Wolfram Alpha. And even if it were not, it
631+ // is not possible to change that, because it could break existing questions.
632+ if ($ value === '* ' ) {
633+ $ thisprecedence ++;
634+ }
635+ // Flush operators with higher precedence, unless we have a unary minus, because
636+ // it is not left-associative.
637+ if ($ value !== '- ' ) {
638+ self ::flush_higher_precedence ($ opstack , $ thisprecedence , $ output );
639+ }
640+ // Put the operator on the stack.
641+ $ opstack [] = $ token ;
642+ break ;
643+
644+ // If we still haven't dealt with the token, there must be a problem with the input.
645+ default :
646+ self ::die (get_string ('error_unexpectedtoken ' , 'qtype_formulas ' , $ value ), $ token );
647+
648+ }
649+
650+ $ lasttoken = $ token ;
651+ }
652+ // After last token, flush opstack. Last token must be either a number (in exponent),
653+ // a closing parenthesis or a unit.
654+ self ::flush_all ($ opstack , $ output );
655+ return $ output ;
656+ }
657+
531658 /**
532659 * Stop processing and indicate the human readable position (row/column) where the error occurred.
533660 *
0 commit comments