Fix duplicate range regexp warnings (rouge-ruby#2030)

* Remove duplicate range regexp warnings * Fixed a LOT of warnings from scala. Turns out it was one local variable used in a lot of regexps. This updates the link to the reference and fixes it per reference. --------- Co-authored-by: Ryan Davis <[email protected]>
tancnle · Mar 18, 2024 · 4a52324 · 4a52324
1 parent 0dbee7d
commit 4a52324
Show file tree

Hide file tree

Showing 15 changed files with 32 additions and 28 deletions.
diff --git a/lib/rouge/lexers/ada.rb b/lib/rouge/lexers/ada.rb
@@ -155,7 +155,7 @@ def self.idents
         end
 
         # Flag word-like things that don't match the ID pattern.
-        rule %r{\b(\p{Pc}|[[alpha]])\p{Word}*}, Error
+        rule %r{\b(\p{Pc}|[[:alpha:]])\p{Word}*}, Error
       end
     end
   end

diff --git a/lib/rouge/lexers/clean.rb b/lib/rouge/lexers/clean.rb
@@ -79,23 +79,23 @@ class instance
 
         rule %r/code(\s+inline)?\s*{/, Comment::Preproc, :abc
 
-        rule %r/_*[a-z][\w_`]*/ do |m|
+        rule %r/_*[a-z][\w`]*/ do |m|
           if self.class.keywords.include?(m[0])
             token Keyword
           else
             token Name
           end
         end
 
-        rule %r/_*[A-Z][\w_`]*/ do |m|
+        rule %r/_*[A-Z][\w`]*/ do |m|
           if m[0]=='True' || m[0]=='False'
             token Keyword::Constant
           else
             token Keyword::Type
           end
         end
 
-        rule %r/[^\w_\s`]/, Punctuation
+        rule %r/[^\w\s`]/, Punctuation
         rule %r/_\b/, Punctuation
       end
 
@@ -136,7 +136,7 @@ class instance
 
         rule %r/}/, Comment::Preproc, :pop!
         rule %r/\.\w*/, Keyword, :abc_rest_of_line
-        rule %r/[\w_]+/, Name::Builtin, :abc_rest_of_line
+        rule %r/[\w]+/, Name::Builtin, :abc_rest_of_line
       end
 
       state :abc_rest_of_line do

diff --git a/lib/rouge/lexers/ecl.rb b/lib/rouge/lexers/ecl.rb
@@ -114,8 +114,8 @@ def self.typed
         mixin :single_quote
 
         rule %r(\b(?i:(and|not|or|in))\b), Operator::Word
-        rule %r([:=|>|<|<>|/|\\|\+|-|=]), Operator
-        rule %r([\[\]{}();,\&,\.,\%]), Punctuation
+        rule %r(:=|>|<|<>|/|\\|\+|-|=), Operator
+        rule %r([\[\]{}();,\&\.\%]), Punctuation
 
         rule %r(\b(?i:(beginc\+\+.*?endc\+\+)))m, Str::Single
         rule %r(\b(?i:(embed.*?endembed)))m, Str::Single

diff --git a/lib/rouge/lexers/elixir.rb b/lib/rouge/lexers/elixir.rb
@@ -135,7 +135,7 @@ def self.detect?(text)
               rule %r/[\\#]/, toktype
             end
 
-            uniq_chars = "#{open}#{close}".squeeze
+            uniq_chars = [open, close].uniq.join
             rule %r/[^##{uniq_chars}\\]+/m, toktype
           end
         end

diff --git a/lib/rouge/lexers/ghc_cmm.rb b/lib/rouge/lexers/ghc_cmm.rb
@@ -22,11 +22,11 @@ class GHCCmm < RegexLexer
       ws = %r(\s|//.*?\n|/[*](?:[^*]|(?:[*][^/]))*[*]+/)mx
 
       # Make sure that this is not a preprocessor macro, e.g. `#if` or `#define`.
-      id = %r((?!\#[a-zA-Z])[\w#\$%_']+)
+      id = %r((?!\#[a-zA-Z])[\w#\$%']+)
 
       complex_id = %r(
-        (?:[\w#$%_']|\(\)|\(,\)|\[\]|[0-9])*
-        (?:[\w#$%_']+)
+        (?:[\w#$%']|\(\)|\(,\)|\[\]|[0-9])*
+        (?:[\w#$%']+)
       )mx
 
       state :root do

diff --git a/lib/rouge/lexers/julia.rb b/lib/rouge/lexers/julia.rb
@@ -252,15 +252,16 @@ def self.detect?(text)
         rule %r/\d+/, Literal::Number::Integer
       end
 
+      NAME_RE = %r/[\p{L}\p{Nl}\p{S}_][\p{Word}\p{S}\p{Po}!]*/
 
       state :funcname do
-        rule %r/[\p{L}\p{Nl}\p{S}_][\p{Word}\p{S}\p{Po}!]*/, Name::Function, :pop!
+        rule NAME_RE, Name::Function, :pop!
         rule %r/\([^\s\w{]{1,2}\)/, Operator, :pop!
         rule %r/[^\s\w{]{1,2}/, Operator, :pop!
       end
 
       state :typename do
-        rule %r/[\p{L}\p{Nl}\p{S}_][\p{Word}\p{S}\p{Po}!]*/, Name::Class, :pop!
+        rule NAME_RE, Name::Class, :pop!
       end
 
       state :stringescape do

diff --git a/lib/rouge/lexers/kotlin.rb b/lib/rouge/lexers/kotlin.rb
@@ -24,7 +24,7 @@ class Kotlin < RegexLexer
         while yield
       )
 
-      name_chars = %r'[-_\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Nl}\p{Nd}\p{Pc}\p{Cf}\p{Mn}\p{Mc}]*'
+      name_chars = %r'[-\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Nl}\p{Nd}\p{Pc}\p{Cf}\p{Mn}\p{Mc}]*'
 
       class_name = %r'`?[\p{Lu}]#{name_chars}`?'
       name = %r'`?[_\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Nl}]#{name_chars}`?'

diff --git a/lib/rouge/lexers/make.rb b/lib/rouge/lexers/make.rb
@@ -73,7 +73,7 @@ def initialize(opts={})
       end
 
       state :export do
-        rule %r/[\w[\$]{1,2}{}()-]/, Name::Variable
+        rule %r/[\w\${}()-]/, Name::Variable
         rule %r/\n/, Text, :pop!
         rule %r/[\t ]+/, Text
       end

diff --git a/lib/rouge/lexers/ocl.rb b/lib/rouge/lexers/ocl.rb
@@ -61,9 +61,9 @@ def self.functions
         rule %r/--.*/, Comment::Single
         rule %r/\d+/, Num::Integer
         rule %r/'/, Str::Single, :single_string
-        rule %r([->|+*/<>=~!@#%&|?^-]), Operator
+        rule %r([-|+*/<>=~!@#%&?^]), Operator
         rule %r/[;:()\[\],.]/, Punctuation
-        rule %r/\w[\w\d]*/ do |m|
+        rule %r/[a-zA-Z]\w*/ do |m|
           if self.class.operators.include? m[0]
             token Operator
           elsif self.class.keywords_type.include? m[0]

diff --git a/lib/rouge/lexers/plsql.rb b/lib/rouge/lexers/plsql.rb
@@ -472,7 +472,7 @@ def self.keywords_type
         # A double-quoted string refers to a database object in our default SQL
         rule %r/"/, Operator, :double_string
         # preprocessor directive treated as special comment
-        rule %r/(\$(?:IF|THEN|ELSE|ELSIF|ERROR|END|(?:\$\$?\w[\w\d]*)))(\s+)/im do
+        rule %r/(\$(?:IF|THEN|ELSE|ELSIF|ERROR|END|(?:\$\$?[a-z]\w*)))(\s+)/im do
           groups Comment::Preproc, Text
         end
 
@@ -503,7 +503,7 @@ def self.keywords_type
         # Special processing for keywords with multiple contexts
         #
         # this madness is to keep the word "replace" from being treated as a builtin function in this context
-        rule %r/(create)(\s+)(?:(or)(\s+)(replace)(\s+))?(package|function|procedure|type)(?:(\s+)(body))?(\s+)(\w[\w\d\$]*)/im do
+        rule %r/(create)(\s+)(?:(or)(\s+)(replace)(\s+))?(package|function|procedure|type)(?:(\s+)(body))?(\s+)([a-z][\w$]*)/im do
           groups Keyword::Reserved, Text, Keyword::Reserved, Text, Keyword::Reserved, Text, Keyword::Reserved, Text, Keyword::Reserved, Text, Name
         end
         # similar for MERGE keywords
@@ -515,7 +515,7 @@ def self.keywords_type
         # General keyword classification with sepcial attention to names
         # in a chained "dot" notation.
         #
-        rule %r/(\w[\w\d\$]*)(\.(?=\w))?/ do |m|
+        rule %r/([a-zA-Z][\w$]*)(\.(?=\w))?/ do |m|
           if self.class.keywords_type.include? m[1].upcase
             tok = Keyword::Type 
           elsif self.class.keywords_func.include? m[1].upcase
@@ -556,11 +556,11 @@ def self.keywords_type
 
       state :dotnames do
         # if we are followed by a dot and another name, we are an ordinary name
-        rule %r/(\w[\w\d\$]*)(\.(?=\w))/ do
+        rule %r/([a-zA-Z][\w\$]*)(\.(?=\w))/ do
           groups Name, Punctuation
         end
         # this rule WILL be true if something pushed into our state. That is our state contract
-        rule %r/\w[\w\d\$]*/ do |m|
+        rule %r/[a-zA-Z][\w\$]*/ do |m|
           if self.class.keywords_func.include? m[0].upcase
             # The Function lookup allows collection methods like COUNT, FIRST, LAST, etc.. to be 
             # classified correctly. Occasionally misidentifies ordinary names as builtin functions,

diff --git a/lib/rouge/lexers/ruby.rb b/lib/rouge/lexers/ruby.rb
@@ -57,7 +57,7 @@ def self.detect?(text)
           token toktype
 
           push do
-            uniq_chars = "#{open}#{close}".squeeze
+            uniq_chars = [open, close].uniq.join
             uniq_chars = '' if open == close && open == "\\#"
             rule %r/\\[##{uniq_chars}\\]/, Str::Escape
             # nesting rules only with asymmetric delimiters

diff --git a/lib/rouge/lexers/scala.rb b/lib/rouge/lexers/scala.rb
@@ -13,7 +13,8 @@ class Scala < RegexLexer
       mimetypes 'text/x-scala', 'application/x-scala'
 
       # As documented in the ENBF section of the scala specification
-      # http://www.scala-lang.org/docu/files/ScalaReference.pdf
+      # https://scala-lang.org/files/archive/spec/2.13/13-syntax-summary.html
+      # https://en.wikipedia.org/wiki/Unicode_character_property#General_Category
       whitespace = /\p{Space}/
       letter = /[\p{L}$_]/
       upper = /[\p{Lu}$_]/
@@ -24,8 +25,10 @@ class Scala < RegexLexer
       # negative lookahead to filter out other classes
       op = %r(
         (?!#{whitespace}|#{letter}|#{digits}|#{parens}|#{delims})
-        [\u0020-\u007F\p{Sm}\p{So}]
+        [-!#%&*/:?@\\^\p{Sm}\p{So}]
       )x
+      # manually removed +<=>|~ from regexp because they're in property Sm
+      # pp CHRS:(0x00..0x7f).map(&:chr).grep(/\p{Sm}/)
 
       idrest = %r(#{letter}(?:#{letter}|#{digits})*(?:(?<=_)#{op}+)?)x
 

diff --git a/lib/rouge/lexers/sql.rb b/lib/rouge/lexers/sql.rb
@@ -115,7 +115,7 @@ def self.keywords_type
         rule %r/"/, Name::Variable, :double_string
         rule %r/`/, Name::Variable, :backtick
 
-        rule %r/\w[\w\d]*/ do |m|
+        rule %r/\w+/ do |m|
           if self.class.keywords_type.include? m[0].upcase
             token Name::Builtin
           elsif self.class.keywords.include? m[0].upcase

diff --git a/lib/rouge/lexers/xojo.rb b/lib/rouge/lexers/xojo.rb
@@ -37,7 +37,7 @@ class Xojo < RegexLexer
         rule %r/\s+/, Text::Whitespace
 
         rule %r/rem\b.*?$/i, Comment::Single
-        rule %r([//'].*$), Comment::Single
+        rule %r((?://|').*$), Comment::Single
         rule %r/\#tag Note.*?\#tag EndNote/mi, Comment::Preproc
         rule %r/\s*[#].*$/x, Comment::Preproc
 

diff --git a/lib/rouge/lexers/yang.rb b/lib/rouge/lexers/yang.rb
@@ -10,7 +10,7 @@ class YANG < RegexLexer
       filenames '*.yang'
       mimetypes 'application/yang'
 
-      id = /[\w_-]+(?=[^\w\-\:])\b/
+      id = /[\w-]+(?=[^\w\-\:])\b/
 
       #Keywords from RFC7950 ; oriented at BNF style
       def self.top_stmts_keywords