diff --git a/lib/PPI/Token/Unknown.pm b/lib/PPI/Token/Unknown.pm index 9ef7f74e..c628a7a2 100644 --- a/lib/PPI/Token/Unknown.pm +++ b/lib/PPI/Token/Unknown.pm @@ -76,19 +76,14 @@ sub __TOKENIZER__on_char { } } - if ( $char eq '$' ) { - my $_class = $self->_cast_or_op( $t ); - # Set class and rerun - $t->{class} = $t->{token}->set_class( $_class ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); - } - if ( $char eq '*' || $char eq '=' ) { # Power operator '**' or mult-assign '*=' $t->{class} = $t->{token}->set_class( 'Operator' ); return 1; } + return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char); + $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->_finalize_token->__TOKENIZER__on_char( $t ); @@ -176,18 +171,13 @@ sub __TOKENIZER__on_char { # Get rest of line pos $t->{line} = $t->{line_cursor} + 1; if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) { - # control-character symbol (e.g. @{^_Foo}) + # control-character symbol (e.g. %{^_Foo}) $t->{class} = $t->{token}->set_class( 'Magic' ); return 1; } } - if ( $char =~ /[\$@%*{]/ ) { - # It's a cast - $t->{class} = $t->{token}->set_class( 'Cast' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); - - } + return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char); # Probably the mod operator $t->{class} = $t->{token}->set_class( 'Operator' ); @@ -209,11 +199,7 @@ sub __TOKENIZER__on_char { return 1; } - if ( $char =~ /[\$@%{]/ ) { - # The ampersand is a cast - $t->{class} = $t->{token}->set_class( 'Cast' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); - } + return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char); # Probably the binary and operator $t->{class} = $t->{token}->set_class( 'Operator' ); @@ -271,26 +257,95 @@ sub __TOKENIZER__on_char { PPI::Exception->throw('Unknown value in PPI::Token::Unknown token'); } +sub _is_cast_or_op { + my ( $self, $char ) = @_; + return 1 if $char eq '$'; + return 1 if $char eq '@'; + return 1 if $char eq '%'; + return 1 if $char eq '*'; + return 1 if $char eq '{'; + return; +} + +sub _as_cast_or_op { + my ( $self, $t ) = @_; + my $class = _cast_or_op( $t ); + $t->{class} = $t->{token}->set_class( $class ); + return $t->_finalize_token->__TOKENIZER__on_char( $t ); +} + +sub _prev_significant_w_cursor { + my ( $tokens, $cursor, $extra_check ) = @_; + while ( $cursor >= 0 ) { + my $token = $tokens->[ $cursor-- ]; + next if !$token->significant; + next if $extra_check and !$extra_check->($token); + return ( $token, $cursor ); + } + return ( undef, $cursor ); +} + # Operator/operand-sensitive, multiple or GLOB cast sub _cast_or_op { - my ( undef, $t ) = @_; - my ( $prev ) = @{ $t->_previous_significant_tokens(1) }; - return 'Cast' if !$prev; - - return 'Operator' if - $prev->isa('PPI::Token::Symbol') - or - $prev->isa('PPI::Token::Number') - or - ( - $prev->isa('PPI::Token::Structure') - and - $prev->content =~ /^(?:\)|\])$/ + my ( $t ) = @_; + + my $tokens = $t->{tokens}; + my $cursor = scalar( @$tokens ) - 1; + my $token; + + ( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor ); + return 'Cast' if !$token; # token was first in the document + + if ( $token->isa( 'PPI::Token::Structure' ) and $token->content eq '}' ) { + + # Scan the token stream backwards an arbitrarily long way, + # looking for the matching opening curly brace. + my $structure_depth = 1; + ( $token, $cursor ) = _prev_significant_w_cursor( + $tokens, $cursor, + sub { + my ( $token ) = @_; + return if !$token->isa( 'PPI::Token::Structure' ); + if ( $token eq '}' ) { + $structure_depth++; + return; + } + if ( $token eq '{' ) { + $structure_depth--; + return if $structure_depth; + } + return 1; + } ); + return 'Operator' if !$token; # no matching '{', probably an unbalanced '}' + + # Scan past any whitespace + ( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor ); + return 'Operator' if !$token; # Document began with what must be a hash constructor. + return 'Operator' if $token->isa( 'PPI::Token::Symbol' ); # subscript + + my %meth_or_subscript_end = map { $_ => 1 } qw@ -> } ] @; + return 'Operator' if $meth_or_subscript_end{ $token->content }; # subscript + + my $content = $token->content; + my $produces_or_wants_value = + ( $token->isa( 'PPI::Token::Word' ) and ( $content eq 'do' or $content eq 'eval' ) ); + return $produces_or_wants_value ? 'Operator' : 'Cast'; + } + + my %list_start_or_term_end = map { $_ => 1 } qw@ ; ( { [ @; + return 'Cast' + if $token->isa( 'PPI::Token::Structure' ) and $list_start_or_term_end{ $token->content } + or $token->isa( 'PPI::Token::Cast' ) + or $token->isa( 'PPI::Token::Operator' ) + or $token->isa( 'PPI::Token::Label' ); + + return 'Operator' if !$token->isa( 'PPI::Token::Word' ); + + ( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor ); + return 'Cast' if !$token || $token->content ne '->'; - # This is pretty weak, there's room for a dozen more tests before going with - # a default. Or even better, a proper operator/operand method :( - return 'Cast'; + return 'Operator'; } # Are we at a location where a ':' would indicate a subroutine attribute diff --git a/t/ppi_token_unknown.t b/t/ppi_token_unknown.t index 04c2e950..2367b9ee 100644 --- a/t/ppi_token_unknown.t +++ b/t/ppi_token_unknown.t @@ -100,8 +100,6 @@ OPERATOR_CAST: { test_varying_whitespace( @nothing, @asterisk_cast, @scalar ); } -{ - local %known_bad_seps = map { $_ => 1 } qw( null ); test_varying_whitespace( @number, @percent_op, @scalar ); test_varying_whitespace( @number, @percent_op, @list ); test_varying_whitespace( @number, @percent_op, @hash ); @@ -109,7 +107,6 @@ OPERATOR_CAST: { test_varying_whitespace( @number, @percent_op, @hashctor1 ); test_varying_whitespace( @number, @percent_op, @hashctor2 ); test_varying_whitespace( @number, @percent_op, @hashctor3 ); -} test_varying_whitespace( @number, @percenteq_op, @bareword ); test_varying_whitespace( @number, @percenteq_op, @hashctor3 ); # doesn't compile, but make sure it's an operator { @@ -117,19 +114,14 @@ OPERATOR_CAST: { test_varying_whitespace( @nothing, @percent_cast, @scalar ); } -{ - local %known_bad_seps = map { $_ => 1 } qw( null ); test_varying_whitespace( @number, @ampersand_op, @scalar ); test_varying_whitespace( @number, @ampersand_op, @list ); test_varying_whitespace( @number, @ampersand_op, @hash ); -} + test_varying_whitespace( @number, @ampersand_op, @glob ); -{ - local %known_bad_seps = map { $_ => 1 } qw( null ); test_varying_whitespace( @number, @ampersand_op, @hashctor1 ); test_varying_whitespace( @number, @ampersand_op, @hashctor2 ); test_varying_whitespace( @number, @ampersand_op, @hashctor3 ); -} test_varying_whitespace( @number, @ampersandeq_op, @bareword ); test_varying_whitespace( @number, @ampersandeq_op, @hashctor3 ); # doesn't compile, but make sure it's an operator { @@ -156,10 +148,11 @@ OPERATOR_CAST: { } my @single = ( "'3'", [ 'PPI::Token::Quote::Single' => "'3'", ] ); + test_varying_whitespace( @single, @asterisk_op, @scalar ); { local %known_bad_seps = map { $_ => 1 } qw( null ); - test_varying_whitespace( @single, @asterisk_op, @scalar ); test_varying_whitespace( @single, @asterisk_op, @hashctor3 ); +} test_varying_whitespace( @single, @percent_op, @scalar ); test_varying_whitespace( @single, @percent_op, @hashctor3 ); test_varying_whitespace( @single, @ampersand_op, @scalar ); @@ -167,16 +160,16 @@ OPERATOR_CAST: { my @double = ( '"3"', [ 'PPI::Token::Quote::Double' => '"3"', ] ); test_varying_whitespace( @double, @asterisk_op, @scalar ); +{ + local %known_bad_seps = map { $_ => 1 } qw( null ); test_varying_whitespace( @double, @asterisk_op, @hashctor3 ); +} test_varying_whitespace( @double, @percent_op, @scalar ); test_varying_whitespace( @double, @percent_op, @hashctor3 ); test_varying_whitespace( @double, @ampersand_op, @scalar ); test_varying_whitespace( @double, @ampersand_op, @hashctor3 ); -} test_varying_whitespace( @scalar, @asterisk_op, @scalar ); -{ - local %known_bad_seps = map { $_ => 1 } qw( null ); test_varying_whitespace( @scalar, @percent_op, @scalar ); test_varying_whitespace( @scalar, @ampersand_op, @scalar ); @@ -192,7 +185,7 @@ OPERATOR_CAST: { ] ); { - local %known_bad_seps = ( %known_bad_seps, map { $_ => 1 } qw( space ) ); + local %known_bad_seps = map { $_ => 1 } qw( null space ); test_varying_whitespace( @package, @asterisk_cast, @scalar, 1 ); test_varying_whitespace( @package, @asterisk_cast, @hashctor3, 1 ); test_varying_whitespace( @package, @percent_cast, @scalar, 1 ); @@ -201,7 +194,6 @@ OPERATOR_CAST: { test_varying_whitespace( @package, @ampersand_cast, @hashctor3, 1 ); test_varying_whitespace( @package, @at_cast, @scalar, 1 ); test_varying_whitespace( @package, @at_cast, @listctor, 1 ); -} } my @sub = ( @@ -300,10 +292,11 @@ OPERATOR_CAST: { 'PPI::Token::Structure' => '}', ] ); + test_varying_whitespace( @evalblock, @asterisk_op, @scalar ); { local %known_bad_seps = map { $_ => 1 } qw( null ); - test_varying_whitespace( @evalblock, @asterisk_op, @scalar ); test_varying_whitespace( @evalblock, @asterisk_op, @hashctor3 ); +} test_varying_whitespace( @evalblock, @percent_op, @scalar ); test_varying_whitespace( @evalblock, @percent_op, @hashctor3 ); test_varying_whitespace( @evalblock, @ampersand_op, @scalar ); @@ -317,12 +310,14 @@ OPERATOR_CAST: { ] ); test_varying_whitespace( @evalstring, @asterisk_op, @scalar ); +{ + local %known_bad_seps = map { $_ => 1 } qw( null ); test_varying_whitespace( @evalstring, @asterisk_op, @hashctor3 ); +} test_varying_whitespace( @evalstring, @percent_op, @scalar ); test_varying_whitespace( @evalstring, @percent_op, @hashctor3 ); test_varying_whitespace( @evalstring, @ampersand_op, @scalar ); test_varying_whitespace( @evalstring, @ampersand_op, @hashctor3 ); -} my @curly_subscript1 = ( '$y->{x}', @@ -383,8 +378,6 @@ OPERATOR_CAST: { ] ); -{ - local %known_bad_seps = map { $_ => 1 } qw( null ); test_varying_whitespace( @curly_subscript1, @asterisk_op, @scalar ); test_varying_whitespace( @curly_subscript1, @percent_op, @scalar ); test_varying_whitespace( @curly_subscript1, @ampersand_op, @scalar ); @@ -394,13 +387,9 @@ OPERATOR_CAST: { test_varying_whitespace( @curly_subscript3, @asterisk_op, @scalar ); test_varying_whitespace( @curly_subscript3, @percent_op, @scalar ); test_varying_whitespace( @curly_subscript3, @ampersand_op, @scalar ); -} test_varying_whitespace( @square_subscript1, @asterisk_op, @scalar ); -{ - local %known_bad_seps = map { $_ => 1 } qw( null ); test_varying_whitespace( @square_subscript1, @percent_op, @scalar ); test_varying_whitespace( @square_subscript1, @ampersand_op, @scalar ); -} { local %known_bad_seps = map { $_ => 1 } qw( space ); @@ -411,8 +400,6 @@ OPERATOR_CAST: { test_varying_whitespace( 'values', [ 'PPI::Token::Word' => 'values' ], @percent_cast, @hashctor3 ); } -TODO: { - local $TODO = "known bug"; test_statement( '} *$a', # unbalanced '}' before '*', arbitrary decision [ @@ -423,7 +410,6 @@ TODO: { 'PPI::Token::Symbol' => '$a', ] ); -} test_statement( '$bar = \%*$foo', # multiple consecutive casts @@ -437,8 +423,6 @@ TODO: { ] ); -TODO: { - local $TODO = "known bug"; test_statement( '$#tmp*$#tmp2', [ @@ -447,7 +431,6 @@ TODO: { 'PPI::Token::ArrayIndex' => '$#tmp2', ] ); -} test_statement( '[ %{$req->parameters} ]', # preceded by '[' @@ -484,8 +467,6 @@ TODO: { ] ); -TODO: { - local $TODO = "known bug"; test_statement( '++$i%$f', # '%' wrongly a cast through 1.220. [ @@ -496,7 +477,6 @@ TODO: { 'PPI::Token::Symbol' => '$f', ] ); -} { # these need to be fixed in PPI::Lexer->_statement, fixing these will break other tests that need to be changed local $TODO = "clarify type of statement in constructor";