Skip to content

Commit

Permalink
Fix duplicate range regexp warnings (rouge-ruby#2030)
Browse files Browse the repository at this point in the history
* Remove duplicate range regexp warnings

* Fixed a LOT of warnings from scala.

Turns out it was one local variable used in a lot of regexps. This
updates the link to the reference and fixes it per reference.

---------

Co-authored-by: Ryan Davis <[email protected]>
  • Loading branch information
tancnle and zenspider authored Mar 18, 2024
1 parent 0dbee7d commit 4a52324
Show file tree
Hide file tree
Showing 15 changed files with 32 additions and 28 deletions.
2 changes: 1 addition & 1 deletion lib/rouge/lexers/ada.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def self.idents
end

# Flag word-like things that don't match the ID pattern.
rule %r{\b(\p{Pc}|[[alpha]])\p{Word}*}, Error
rule %r{\b(\p{Pc}|[[:alpha:]])\p{Word}*}, Error
end
end
end
Expand Down
8 changes: 4 additions & 4 deletions lib/rouge/lexers/clean.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,23 +79,23 @@ class instance

rule %r/code(\s+inline)?\s*{/, Comment::Preproc, :abc

rule %r/_*[a-z][\w_`]*/ do |m|
rule %r/_*[a-z][\w`]*/ do |m|
if self.class.keywords.include?(m[0])
token Keyword
else
token Name
end
end

rule %r/_*[A-Z][\w_`]*/ do |m|
rule %r/_*[A-Z][\w`]*/ do |m|
if m[0]=='True' || m[0]=='False'
token Keyword::Constant
else
token Keyword::Type
end
end

rule %r/[^\w_\s`]/, Punctuation
rule %r/[^\w\s`]/, Punctuation
rule %r/_\b/, Punctuation
end

Expand Down Expand Up @@ -136,7 +136,7 @@ class instance

rule %r/}/, Comment::Preproc, :pop!
rule %r/\.\w*/, Keyword, :abc_rest_of_line
rule %r/[\w_]+/, Name::Builtin, :abc_rest_of_line
rule %r/[\w]+/, Name::Builtin, :abc_rest_of_line
end

state :abc_rest_of_line do
Expand Down
4 changes: 2 additions & 2 deletions lib/rouge/lexers/ecl.rb
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ def self.typed
mixin :single_quote

rule %r(\b(?i:(and|not|or|in))\b), Operator::Word
rule %r([:=|>|<|<>|/|\\|\+|-|=]), Operator
rule %r([\[\]{}();,\&,\.,\%]), Punctuation
rule %r(:=|>|<|<>|/|\\|\+|-|=), Operator
rule %r([\[\]{}();,\&\.\%]), Punctuation

rule %r(\b(?i:(beginc\+\+.*?endc\+\+)))m, Str::Single
rule %r(\b(?i:(embed.*?endembed)))m, Str::Single
Expand Down
2 changes: 1 addition & 1 deletion lib/rouge/lexers/elixir.rb
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def self.detect?(text)
rule %r/[\\#]/, toktype
end

uniq_chars = "#{open}#{close}".squeeze
uniq_chars = [open, close].uniq.join
rule %r/[^##{uniq_chars}\\]+/m, toktype
end
end
Expand Down
6 changes: 3 additions & 3 deletions lib/rouge/lexers/ghc_cmm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ class GHCCmm < RegexLexer
ws = %r(\s|//.*?\n|/[*](?:[^*]|(?:[*][^/]))*[*]+/)mx

# Make sure that this is not a preprocessor macro, e.g. `#if` or `#define`.
id = %r((?!\#[a-zA-Z])[\w#\$%_']+)
id = %r((?!\#[a-zA-Z])[\w#\$%']+)

complex_id = %r(
(?:[\w#$%_']|\(\)|\(,\)|\[\]|[0-9])*
(?:[\w#$%_']+)
(?:[\w#$%']|\(\)|\(,\)|\[\]|[0-9])*
(?:[\w#$%']+)
)mx

state :root do
Expand Down
5 changes: 3 additions & 2 deletions lib/rouge/lexers/julia.rb
Original file line number Diff line number Diff line change
Expand Up @@ -252,15 +252,16 @@ def self.detect?(text)
rule %r/\d+/, Literal::Number::Integer
end

NAME_RE = %r/[\p{L}\p{Nl}\p{S}_][\p{Word}\p{S}\p{Po}!]*/

state :funcname do
rule %r/[\p{L}\p{Nl}\p{S}_][\p{Word}\p{S}\p{Po}!]*/, Name::Function, :pop!
rule NAME_RE, Name::Function, :pop!
rule %r/\([^\s\w{]{1,2}\)/, Operator, :pop!
rule %r/[^\s\w{]{1,2}/, Operator, :pop!
end

state :typename do
rule %r/[\p{L}\p{Nl}\p{S}_][\p{Word}\p{S}\p{Po}!]*/, Name::Class, :pop!
rule NAME_RE, Name::Class, :pop!
end

state :stringescape do
Expand Down
2 changes: 1 addition & 1 deletion lib/rouge/lexers/kotlin.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Kotlin < RegexLexer
while yield
)

name_chars = %r'[-_\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Nl}\p{Nd}\p{Pc}\p{Cf}\p{Mn}\p{Mc}]*'
name_chars = %r'[-\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Nl}\p{Nd}\p{Pc}\p{Cf}\p{Mn}\p{Mc}]*'

class_name = %r'`?[\p{Lu}]#{name_chars}`?'
name = %r'`?[_\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Nl}]#{name_chars}`?'
Expand Down
2 changes: 1 addition & 1 deletion lib/rouge/lexers/make.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def initialize(opts={})
end

state :export do
rule %r/[\w[\$]{1,2}{}()-]/, Name::Variable
rule %r/[\w\${}()-]/, Name::Variable
rule %r/\n/, Text, :pop!
rule %r/[\t ]+/, Text
end
Expand Down
4 changes: 2 additions & 2 deletions lib/rouge/lexers/ocl.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def self.functions
rule %r/--.*/, Comment::Single
rule %r/\d+/, Num::Integer
rule %r/'/, Str::Single, :single_string
rule %r([->|+*/<>=~!@#%&|?^-]), Operator
rule %r([-|+*/<>=~!@#%&?^]), Operator
rule %r/[;:()\[\],.]/, Punctuation
rule %r/\w[\w\d]*/ do |m|
rule %r/[a-zA-Z]\w*/ do |m|
if self.class.operators.include? m[0]
token Operator
elsif self.class.keywords_type.include? m[0]
Expand Down
10 changes: 5 additions & 5 deletions lib/rouge/lexers/plsql.rb
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def self.keywords_type
# A double-quoted string refers to a database object in our default SQL
rule %r/"/, Operator, :double_string
# preprocessor directive treated as special comment
rule %r/(\$(?:IF|THEN|ELSE|ELSIF|ERROR|END|(?:\$\$?\w[\w\d]*)))(\s+)/im do
rule %r/(\$(?:IF|THEN|ELSE|ELSIF|ERROR|END|(?:\$\$?[a-z]\w*)))(\s+)/im do
groups Comment::Preproc, Text
end

Expand Down Expand Up @@ -503,7 +503,7 @@ def self.keywords_type
# Special processing for keywords with multiple contexts
#
# this madness is to keep the word "replace" from being treated as a builtin function in this context
rule %r/(create)(\s+)(?:(or)(\s+)(replace)(\s+))?(package|function|procedure|type)(?:(\s+)(body))?(\s+)(\w[\w\d\$]*)/im do
rule %r/(create)(\s+)(?:(or)(\s+)(replace)(\s+))?(package|function|procedure|type)(?:(\s+)(body))?(\s+)([a-z][\w$]*)/im do
groups Keyword::Reserved, Text, Keyword::Reserved, Text, Keyword::Reserved, Text, Keyword::Reserved, Text, Keyword::Reserved, Text, Name
end
# similar for MERGE keywords
Expand All @@ -515,7 +515,7 @@ def self.keywords_type
# General keyword classification with sepcial attention to names
# in a chained "dot" notation.
#
rule %r/(\w[\w\d\$]*)(\.(?=\w))?/ do |m|
rule %r/([a-zA-Z][\w$]*)(\.(?=\w))?/ do |m|
if self.class.keywords_type.include? m[1].upcase
tok = Keyword::Type
elsif self.class.keywords_func.include? m[1].upcase
Expand Down Expand Up @@ -556,11 +556,11 @@ def self.keywords_type

state :dotnames do
# if we are followed by a dot and another name, we are an ordinary name
rule %r/(\w[\w\d\$]*)(\.(?=\w))/ do
rule %r/([a-zA-Z][\w\$]*)(\.(?=\w))/ do
groups Name, Punctuation
end
# this rule WILL be true if something pushed into our state. That is our state contract
rule %r/\w[\w\d\$]*/ do |m|
rule %r/[a-zA-Z][\w\$]*/ do |m|
if self.class.keywords_func.include? m[0].upcase
# The Function lookup allows collection methods like COUNT, FIRST, LAST, etc.. to be
# classified correctly. Occasionally misidentifies ordinary names as builtin functions,
Expand Down
2 changes: 1 addition & 1 deletion lib/rouge/lexers/ruby.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def self.detect?(text)
token toktype

push do
uniq_chars = "#{open}#{close}".squeeze
uniq_chars = [open, close].uniq.join
uniq_chars = '' if open == close && open == "\\#"
rule %r/\\[##{uniq_chars}\\]/, Str::Escape
# nesting rules only with asymmetric delimiters
Expand Down
7 changes: 5 additions & 2 deletions lib/rouge/lexers/scala.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ class Scala < RegexLexer
mimetypes 'text/x-scala', 'application/x-scala'

# As documented in the ENBF section of the scala specification
# http://www.scala-lang.org/docu/files/ScalaReference.pdf
# https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html
# https://en.wikipedia.org/wiki/Unicode_character_property#General_Category
whitespace = /\p{Space}/
letter = /[\p{L}$_]/
upper = /[\p{Lu}$_]/
Expand All @@ -24,8 +25,10 @@ class Scala < RegexLexer
# negative lookahead to filter out other classes
op = %r(
(?!#{whitespace}|#{letter}|#{digits}|#{parens}|#{delims})
[\u0020-\u007F\p{Sm}\p{So}]
[-!#%&*/:?@\\^\p{Sm}\p{So}]
)x
# manually removed +<=>|~ from regexp because they're in property Sm
# pp CHRS:(0x00..0x7f).map(&:chr).grep(/\p{Sm}/)

idrest = %r(#{letter}(?:#{letter}|#{digits})*(?:(?<=_)#{op}+)?)x

Expand Down
2 changes: 1 addition & 1 deletion lib/rouge/lexers/sql.rb
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def self.keywords_type
rule %r/"/, Name::Variable, :double_string
rule %r/`/, Name::Variable, :backtick

rule %r/\w[\w\d]*/ do |m|
rule %r/\w+/ do |m|
if self.class.keywords_type.include? m[0].upcase
token Name::Builtin
elsif self.class.keywords.include? m[0].upcase
Expand Down
2 changes: 1 addition & 1 deletion lib/rouge/lexers/xojo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class Xojo < RegexLexer
rule %r/\s+/, Text::Whitespace

rule %r/rem\b.*?$/i, Comment::Single
rule %r([//'].*$), Comment::Single
rule %r((?://|').*$), Comment::Single
rule %r/\#tag Note.*?\#tag EndNote/mi, Comment::Preproc
rule %r/\s*[#].*$/x, Comment::Preproc

Expand Down
2 changes: 1 addition & 1 deletion lib/rouge/lexers/yang.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class YANG < RegexLexer
filenames '*.yang'
mimetypes 'application/yang'

id = /[\w_-]+(?=[^\w\-\:])\b/
id = /[\w-]+(?=[^\w\-\:])\b/

#Keywords from RFC7950 ; oriented at BNF style
def self.top_stmts_keywords
Expand Down

0 comments on commit 4a52324

Please sign in to comment.