From 63e2718123ce36147a602d5c8749555a5dcfe430 Mon Sep 17 00:00:00 2001
From: Peter Arato <it.arato@gmail.com>
Date: Tue, 9 May 2023 12:00:12 -0400
Subject: [PATCH] Add String#byteindex Add String#byterindex Add tests

---
 CHANGELOG.md                                  |   1 +
 spec/ruby/core/string/byteindex_spec.rb       | 304 +++++++++++++++
 spec/ruby/core/string/byterindex_spec.rb      | 359 ++++++++++++++++++
 .../core/string/shared/byte_index_common.rb   |  63 +++
 spec/truffle/methods/String.txt               |   2 +
 spec/truffleruby.next-specs                   |   3 +
 .../truffleruby/core/string/StringNodes.java  |  33 ++
 src/main/ruby/truffleruby/core/string.rb      |  76 ++++
 .../core/truffle/polyglot_methods.rb          |   8 +
 .../core/truffle/string_operations.rb         |  11 +
 10 files changed, 860 insertions(+)
 create mode 100644 spec/ruby/core/string/byteindex_spec.rb
 create mode 100644 spec/ruby/core/string/byterindex_spec.rb
 create mode 100644 spec/ruby/core/string/shared/byte_index_common.rb

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd71879c8559..27497c18b24e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ Compatibility:
 * Fix `Array#[]` with `ArithmeticSequence` argument when step is negative (#3039, @itarato).
 * Fix `Range#size` and return `nil` for beginningless Range when end isn't Numeric (#3039, @rwstauner).
 * Alias `String#-@` to `String#dedup` (#3039, @itarato).
+* Add `String#byteindex` and `String#byterindex` (#3039, @itarato).
 
 Performance:
 
diff --git a/spec/ruby/core/string/byteindex_spec.rb b/spec/ruby/core/string/byteindex_spec.rb
new file mode 100644
index 000000000000..7be0c7ec1ef9
--- /dev/null
+++ b/spec/ruby/core/string/byteindex_spec.rb
@@ -0,0 +1,304 @@
+# -*- encoding: utf-8 -*-
+require_relative '../../spec_helper'
+require_relative 'fixtures/classes'
+require_relative 'shared/byte_index_common.rb'
+
+describe "String#byteindex" do
+  ruby_version_is "3.2" do
+    it "calls #to_str to convert the first argument" do
+      char = mock("string index char")
+      char.should_receive(:to_str).and_return("b")
+      "abc".byteindex(char).should == 1
+    end
+
+    it "calls #to_int to convert the second argument" do
+      offset = mock("string index offset")
+      offset.should_receive(:to_int).and_return(1)
+      "abc".byteindex("c", offset).should == 2
+    end
+
+    it "does not raise IndexError when byte offset is correct or on string boundary" do
+      "わ".byteindex("").should == 0
+      "わ".byteindex("", 0).should == 0
+      "わ".byteindex("", 3).should == 3
+    end
+
+    it_behaves_like :byte_index_common, :byteindex
+  end
+end
+
+describe "String#byteindex with String" do
+  ruby_version_is "3.2" do
+    it "behaves the same as String#byteindex(char) for one-character strings" do
+      "blablabla hello cruel world...!".split("").uniq.each do |str|
+        chr = str[0]
+        str.byteindex(str).should == str.byteindex(chr)
+
+        0.upto(str.size + 1) do |start|
+          str.byteindex(str, start).should == str.byteindex(chr, start)
+        end
+
+        (-str.size - 1).upto(-1) do |start|
+          str.byteindex(str, start).should == str.byteindex(chr, start)
+        end
+      end
+    end
+
+    it "returns the byteindex of the first occurrence of the given substring" do
+      "blablabla".byteindex("").should == 0
+      "blablabla".byteindex("b").should == 0
+      "blablabla".byteindex("bla").should == 0
+      "blablabla".byteindex("blabla").should == 0
+      "blablabla".byteindex("blablabla").should == 0
+
+      "blablabla".byteindex("l").should == 1
+      "blablabla".byteindex("la").should == 1
+      "blablabla".byteindex("labla").should == 1
+      "blablabla".byteindex("lablabla").should == 1
+
+      "blablabla".byteindex("a").should == 2
+      "blablabla".byteindex("abla").should == 2
+      "blablabla".byteindex("ablabla").should == 2
+    end
+
+    it "treats the offset as a byteindex" do
+      "aaaaa".byteindex("a", 0).should == 0
+      "aaaaa".byteindex("a", 2).should == 2
+      "aaaaa".byteindex("a", 4).should == 4
+    end
+
+    it "ignores string subclasses" do
+      "blablabla".byteindex(StringSpecs::MyString.new("bla")).should == 0
+      StringSpecs::MyString.new("blablabla").byteindex("bla").should == 0
+      StringSpecs::MyString.new("blablabla").byteindex(StringSpecs::MyString.new("bla")).should == 0
+    end
+
+    it "starts the search at the given offset" do
+      "blablabla".byteindex("bl", 0).should == 0
+      "blablabla".byteindex("bl", 1).should == 3
+      "blablabla".byteindex("bl", 2).should == 3
+      "blablabla".byteindex("bl", 3).should == 3
+
+      "blablabla".byteindex("bla", 0).should == 0
+      "blablabla".byteindex("bla", 1).should == 3
+      "blablabla".byteindex("bla", 2).should == 3
+      "blablabla".byteindex("bla", 3).should == 3
+
+      "blablabla".byteindex("blab", 0).should == 0
+      "blablabla".byteindex("blab", 1).should == 3
+      "blablabla".byteindex("blab", 2).should == 3
+      "blablabla".byteindex("blab", 3).should == 3
+
+      "blablabla".byteindex("la", 1).should == 1
+      "blablabla".byteindex("la", 2).should == 4
+      "blablabla".byteindex("la", 3).should == 4
+      "blablabla".byteindex("la", 4).should == 4
+
+      "blablabla".byteindex("lab", 1).should == 1
+      "blablabla".byteindex("lab", 2).should == 4
+      "blablabla".byteindex("lab", 3).should == 4
+      "blablabla".byteindex("lab", 4).should == 4
+
+      "blablabla".byteindex("ab", 2).should == 2
+      "blablabla".byteindex("ab", 3).should == 5
+      "blablabla".byteindex("ab", 4).should == 5
+      "blablabla".byteindex("ab", 5).should == 5
+
+      "blablabla".byteindex("", 0).should == 0
+      "blablabla".byteindex("", 1).should == 1
+      "blablabla".byteindex("", 2).should == 2
+      "blablabla".byteindex("", 7).should == 7
+      "blablabla".byteindex("", 8).should == 8
+      "blablabla".byteindex("", 9).should == 9
+    end
+
+    it "starts the search at offset + self.length if offset is negative" do
+      str = "blablabla"
+
+      ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle|
+        (-str.length .. -1).each do |offset|
+          str.byteindex(needle, offset).should ==
+          str.byteindex(needle, offset + str.length)
+        end
+      end
+    end
+
+    it "returns nil if the substring isn't found" do
+      "blablabla".byteindex("B").should == nil
+      "blablabla".byteindex("z").should == nil
+      "blablabla".byteindex("BLA").should == nil
+      "blablabla".byteindex("blablablabla").should == nil
+      "blablabla".byteindex("", 10).should == nil
+
+      "hello".byteindex("he", 1).should == nil
+      "hello".byteindex("he", 2).should == nil
+      "I’ve got a multibyte character.\n".byteindex("\n\n").should == nil
+    end
+
+    it "returns the character byteindex of a multibyte character" do
+      "ありがとう".byteindex("が").should == 6
+    end
+
+    it "returns the character byteindex after offset" do
+      "われわれ".byteindex("わ", 3).should == 6
+      "ありがとうありがとう".byteindex("が", 9).should == 21
+    end
+
+    it "returns the character byteindex after a partial first match" do
+      "</</h".byteindex("</h").should == 2
+    end
+
+    it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
+      char = "れ".encode Encoding::EUC_JP
+      -> do
+        "あれ".byteindex(char)
+      end.should raise_error(Encoding::CompatibilityError)
+    end
+
+    it "handles a substring in a superset encoding" do
+      'abc'.force_encoding(Encoding::US_ASCII).byteindex('é').should == nil
+    end
+
+    it "handles a substring in a subset encoding" do
+      'été'.byteindex('t'.force_encoding(Encoding::US_ASCII)).should == 2
+    end
+  end
+end
+
+describe "String#byteindex with Regexp" do
+  ruby_version_is "3.2" do
+    it "behaves the same as String#byteindex(string) for escaped string regexps" do
+      ["blablabla", "hello cruel world...!"].each do |str|
+        ["", "b", "bla", "lab", "o c", "d."].each do |needle|
+          regexp = Regexp.new(Regexp.escape(needle))
+          str.byteindex(regexp).should == str.byteindex(needle)
+
+          0.upto(str.size + 1) do |start|
+            str.byteindex(regexp, start).should == str.byteindex(needle, start)
+          end
+
+          (-str.size - 1).upto(-1) do |start|
+            str.byteindex(regexp, start).should == str.byteindex(needle, start)
+          end
+        end
+      end
+    end
+
+    it "returns the byteindex of the first match of regexp" do
+      "blablabla".byteindex(/bla/).should == 0
+      "blablabla".byteindex(/BLA/i).should == 0
+
+      "blablabla".byteindex(/.{0}/).should == 0
+      "blablabla".byteindex(/.{6}/).should == 0
+      "blablabla".byteindex(/.{9}/).should == 0
+
+      "blablabla".byteindex(/.*/).should == 0
+      "blablabla".byteindex(/.+/).should == 0
+
+      "blablabla".byteindex(/lab|b/).should == 0
+
+      not_supported_on :opal do
+        "blablabla".byteindex(/\A/).should == 0
+        "blablabla".byteindex(/\Z/).should == 9
+        "blablabla".byteindex(/\z/).should == 9
+        "blablabla\n".byteindex(/\Z/).should == 9
+        "blablabla\n".byteindex(/\z/).should == 10
+      end
+
+      "blablabla".byteindex(/^/).should == 0
+      "\nblablabla".byteindex(/^/).should == 0
+      "b\nablabla".byteindex(/$/).should == 1
+      "bl\nablabla".byteindex(/$/).should == 2
+
+      "blablabla".byteindex(/.l./).should == 0
+    end
+
+    it "starts the search at the given offset" do
+      "blablabla".byteindex(/.{0}/, 5).should == 5
+      "blablabla".byteindex(/.{1}/, 5).should == 5
+      "blablabla".byteindex(/.{2}/, 5).should == 5
+      "blablabla".byteindex(/.{3}/, 5).should == 5
+      "blablabla".byteindex(/.{4}/, 5).should == 5
+
+      "blablabla".byteindex(/.{0}/, 3).should == 3
+      "blablabla".byteindex(/.{1}/, 3).should == 3
+      "blablabla".byteindex(/.{2}/, 3).should == 3
+      "blablabla".byteindex(/.{5}/, 3).should == 3
+      "blablabla".byteindex(/.{6}/, 3).should == 3
+
+      "blablabla".byteindex(/.l./, 0).should == 0
+      "blablabla".byteindex(/.l./, 1).should == 3
+      "blablabla".byteindex(/.l./, 2).should == 3
+      "blablabla".byteindex(/.l./, 3).should == 3
+
+      "xblaxbla".byteindex(/x./, 0).should == 0
+      "xblaxbla".byteindex(/x./, 1).should == 4
+      "xblaxbla".byteindex(/x./, 2).should == 4
+
+      not_supported_on :opal do
+        "blablabla\n".byteindex(/\Z/, 9).should == 9
+      end
+    end
+
+    it "starts the search at offset + self.length if offset is negative" do
+      str = "blablabla"
+
+      ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle|
+        (-str.length .. -1).each do |offset|
+          str.byteindex(needle, offset).should ==
+          str.byteindex(needle, offset + str.length)
+        end
+      end
+    end
+
+    it "returns nil if the substring isn't found" do
+      "blablabla".byteindex(/BLA/).should == nil
+
+      "blablabla".byteindex(/.{10}/).should == nil
+      "blaxbla".byteindex(/.x/, 3).should == nil
+      "blaxbla".byteindex(/..x/, 2).should == nil
+    end
+
+    it "returns nil if the Regexp matches the empty string and the offset is out of range" do
+      "ruby".byteindex(//, 12).should be_nil
+    end
+
+    it "supports \\G which matches at the given start offset" do
+      "helloYOU.".byteindex(/\GYOU/, 5).should == 5
+      "helloYOU.".byteindex(/\GYOU/).should == nil
+
+      re = /\G.+YOU/
+      # The # marks where \G will match.
+      [
+        ["#hi!YOUall.", 0],
+        ["h#i!YOUall.", 1],
+        ["hi#!YOUall.", 2],
+        ["hi!#YOUall.", nil]
+      ].each do |spec|
+
+        start = spec[0].byteindex("#")
+        str = spec[0].delete("#")
+
+        str.byteindex(re, start).should == spec[1]
+      end
+    end
+
+    it "converts start_offset to an integer via to_int" do
+      obj = mock('1')
+      obj.should_receive(:to_int).and_return(1)
+      "RWOARW".byteindex(/R./, obj).should == 4
+    end
+
+    it "returns the character byteindex of a multibyte character" do
+      "ありがとう".byteindex(/が/).should == 6
+    end
+
+    it "returns the character byteindex after offset" do
+      "われわれ".byteindex(/わ/, 3).should == 6
+    end
+
+    it "treats the offset as a byteindex" do
+      "われわわれ".byteindex(/わ/, 6).should == 6
+    end
+  end
+end
diff --git a/spec/ruby/core/string/byterindex_spec.rb b/spec/ruby/core/string/byterindex_spec.rb
new file mode 100644
index 000000000000..717708c97d36
--- /dev/null
+++ b/spec/ruby/core/string/byterindex_spec.rb
@@ -0,0 +1,359 @@
+# -*- encoding: utf-8 -*-
+require_relative '../../spec_helper'
+require_relative 'fixtures/classes'
+require_relative 'shared/byte_index_common.rb'
+
+describe "String#byterindex with object" do
+  ruby_version_is "3.2" do
+    it "tries to convert obj to a string via to_str" do
+      obj = mock('lo')
+      def obj.to_str() "lo" end
+      "hello".byterindex(obj).should == "hello".byterindex("lo")
+
+      obj = mock('o')
+      def obj.respond_to?(arg, *) true end
+      def obj.method_missing(*args) "o" end
+      "hello".byterindex(obj).should == "hello".byterindex("o")
+    end
+
+    it "calls #to_int to convert the second argument" do
+      offset = mock("string index offset")
+      offset.should_receive(:to_int).and_return(3)
+      "abc".byterindex("c", offset).should == 2
+    end
+
+    it "does not raise IndexError when byte offset is correct or on string boundary" do
+      "わ".byterindex("", 0).should == 0
+      "わ".byterindex("", 3).should == 3
+      "わ".byterindex("").should == 3
+    end
+
+    it_behaves_like :byte_index_common, :byterindex
+  end
+end
+
+describe "String#byterindex with String" do
+  ruby_version_is "3.2" do
+    it "behaves the same as String#byterindex(char) for one-character strings" do
+      "blablabla hello cruel world...!".split("").uniq.each do |str|
+        chr = str[0]
+        str.byterindex(str).should == str.byterindex(chr)
+
+        0.upto(str.size + 1) do |start|
+          str.byterindex(str, start).should == str.byterindex(chr, start)
+        end
+
+        (-str.size - 1).upto(-1) do |start|
+          str.byterindex(str, start).should == str.byterindex(chr, start)
+        end
+      end
+    end
+
+    it "behaves the same as String#byterindex(?char) for one-character strings" do
+      "blablabla hello cruel world...!".split("").uniq.each do |str|
+        chr = str[0] =~ / / ? str[0] : eval("?#{str[0]}")
+        str.byterindex(str).should == str.byterindex(chr)
+
+        0.upto(str.size + 1) do |start|
+          str.byterindex(str, start).should == str.byterindex(chr, start)
+        end
+
+        (-str.size - 1).upto(-1) do |start|
+          str.byterindex(str, start).should == str.byterindex(chr, start)
+        end
+      end
+    end
+
+    it "returns the index of the last occurrence of the given substring" do
+      "blablabla".byterindex("").should == 9
+      "blablabla".byterindex("a").should == 8
+      "blablabla".byterindex("la").should == 7
+      "blablabla".byterindex("bla").should == 6
+      "blablabla".byterindex("abla").should == 5
+      "blablabla".byterindex("labla").should == 4
+      "blablabla".byterindex("blabla").should == 3
+      "blablabla".byterindex("ablabla").should == 2
+      "blablabla".byterindex("lablabla").should == 1
+      "blablabla".byterindex("blablabla").should == 0
+
+      "blablabla".byterindex("l").should == 7
+      "blablabla".byterindex("bl").should == 6
+      "blablabla".byterindex("abl").should == 5
+      "blablabla".byterindex("labl").should == 4
+      "blablabla".byterindex("blabl").should == 3
+      "blablabla".byterindex("ablabl").should == 2
+      "blablabla".byterindex("lablabl").should == 1
+      "blablabla".byterindex("blablabl").should == 0
+
+      "blablabla".byterindex("b").should == 6
+      "blablabla".byterindex("ab").should == 5
+      "blablabla".byterindex("lab").should == 4
+      "blablabla".byterindex("blab").should == 3
+      "blablabla".byterindex("ablab").should == 2
+      "blablabla".byterindex("lablab").should == 1
+      "blablabla".byterindex("blablab").should == 0
+    end
+
+    it "ignores string subclasses" do
+      "blablabla".byterindex(StringSpecs::MyString.new("bla")).should == 6
+      StringSpecs::MyString.new("blablabla").byterindex("bla").should == 6
+      StringSpecs::MyString.new("blablabla").byterindex(StringSpecs::MyString.new("bla")).should == 6
+    end
+
+    it "starts the search at the given offset" do
+      "blablabla".byterindex("bl", 0).should == 0
+      "blablabla".byterindex("bl", 1).should == 0
+      "blablabla".byterindex("bl", 2).should == 0
+      "blablabla".byterindex("bl", 3).should == 3
+
+      "blablabla".byterindex("bla", 0).should == 0
+      "blablabla".byterindex("bla", 1).should == 0
+      "blablabla".byterindex("bla", 2).should == 0
+      "blablabla".byterindex("bla", 3).should == 3
+
+      "blablabla".byterindex("blab", 0).should == 0
+      "blablabla".byterindex("blab", 1).should == 0
+      "blablabla".byterindex("blab", 2).should == 0
+      "blablabla".byterindex("blab", 3).should == 3
+      "blablabla".byterindex("blab", 6).should == 3
+      "blablablax".byterindex("blab", 6).should == 3
+
+      "blablabla".byterindex("la", 1).should == 1
+      "blablabla".byterindex("la", 2).should == 1
+      "blablabla".byterindex("la", 3).should == 1
+      "blablabla".byterindex("la", 4).should == 4
+
+      "blablabla".byterindex("lab", 1).should == 1
+      "blablabla".byterindex("lab", 2).should == 1
+      "blablabla".byterindex("lab", 3).should == 1
+      "blablabla".byterindex("lab", 4).should == 4
+
+      "blablabla".byterindex("ab", 2).should == 2
+      "blablabla".byterindex("ab", 3).should == 2
+      "blablabla".byterindex("ab", 4).should == 2
+      "blablabla".byterindex("ab", 5).should == 5
+
+      "blablabla".byterindex("", 0).should == 0
+      "blablabla".byterindex("", 1).should == 1
+      "blablabla".byterindex("", 2).should == 2
+      "blablabla".byterindex("", 7).should == 7
+      "blablabla".byterindex("", 8).should == 8
+      "blablabla".byterindex("", 9).should == 9
+      "blablabla".byterindex("", 10).should == 9
+    end
+
+    it "starts the search at offset + self.length if offset is negative" do
+      str = "blablabla"
+
+      ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle|
+        (-str.length .. -1).each do |offset|
+          str.byterindex(needle, offset).should ==
+          str.byterindex(needle, offset + str.length)
+        end
+      end
+    end
+
+    it "returns nil if the substring isn't found" do
+      "blablabla".byterindex("B").should == nil
+      "blablabla".byterindex("z").should == nil
+      "blablabla".byterindex("BLA").should == nil
+      "blablabla".byterindex("blablablabla").should == nil
+
+      "hello".byterindex("lo", 0).should == nil
+      "hello".byterindex("lo", 1).should == nil
+      "hello".byterindex("lo", 2).should == nil
+
+      "hello".byterindex("llo", 0).should == nil
+      "hello".byterindex("llo", 1).should == nil
+
+      "hello".byterindex("el", 0).should == nil
+      "hello".byterindex("ello", 0).should == nil
+
+      "hello".byterindex("", -6).should == nil
+      "hello".byterindex("", -7).should == nil
+
+      "hello".byterindex("h", -6).should == nil
+    end
+
+    it "tries to convert start_offset to an integer via to_int" do
+      obj = mock('5')
+      def obj.to_int() 5 end
+      "str".byterindex("st", obj).should == 0
+
+      obj = mock('5')
+      def obj.respond_to?(arg, *) true end
+      def obj.method_missing(*args) 5 end
+      "str".byterindex("st", obj).should == 0
+    end
+
+    it "raises a TypeError when given offset is nil" do
+      -> { "str".byterindex("st", nil) }.should raise_error(TypeError)
+    end
+
+    it "handles a substring in a superset encoding" do
+      'abc'.force_encoding(Encoding::US_ASCII).byterindex('é').should == nil
+    end
+
+    it "handles a substring in a subset encoding" do
+      'été'.byterindex('t'.force_encoding(Encoding::US_ASCII)).should == 2
+    end
+  end
+end
+
+describe "String#byterindex with Regexp" do
+  ruby_version_is "3.2" do
+    it "behaves the same as String#byterindex(string) for escaped string regexps" do
+      ["blablabla", "hello cruel world...!"].each do |str|
+        ["", "b", "bla", "lab", "o c", "d."].each do |needle|
+          regexp = Regexp.new(Regexp.escape(needle))
+          str.byterindex(regexp).should == str.byterindex(needle)
+
+          0.upto(str.size + 1) do |start|
+            str.byterindex(regexp, start).should == str.byterindex(needle, start)
+          end
+
+          (-str.size - 1).upto(-1) do |start|
+            str.byterindex(regexp, start).should == str.byterindex(needle, start)
+          end
+        end
+      end
+    end
+
+    it "returns the index of the first match from the end of string of regexp" do
+      "blablabla".byterindex(/bla/).should == 6
+      "blablabla".byterindex(/BLA/i).should == 6
+
+      "blablabla".byterindex(/.{0}/).should == 9
+      "blablabla".byterindex(/.{1}/).should == 8
+      "blablabla".byterindex(/.{2}/).should == 7
+      "blablabla".byterindex(/.{6}/).should == 3
+      "blablabla".byterindex(/.{9}/).should == 0
+
+      "blablabla".byterindex(/.*/).should == 9
+      "blablabla".byterindex(/.+/).should == 8
+
+      "blablabla".byterindex(/bla|a/).should == 8
+
+      not_supported_on :opal do
+        "blablabla".byterindex(/\A/).should == 0
+        "blablabla".byterindex(/\Z/).should == 9
+        "blablabla".byterindex(/\z/).should == 9
+        "blablabla\n".byterindex(/\Z/).should == 10
+        "blablabla\n".byterindex(/\z/).should == 10
+      end
+
+      "blablabla".byterindex(/^/).should == 0
+      not_supported_on :opal do
+        "\nblablabla".byterindex(/^/).should == 1
+        "b\nlablabla".byterindex(/^/).should == 2
+      end
+      "blablabla".byterindex(/$/).should == 9
+
+      "blablabla".byterindex(/.l./).should == 6
+    end
+
+    it "starts the search at the given offset" do
+      "blablabla".byterindex(/.{0}/, 5).should == 5
+      "blablabla".byterindex(/.{1}/, 5).should == 5
+      "blablabla".byterindex(/.{2}/, 5).should == 5
+      "blablabla".byterindex(/.{3}/, 5).should == 5
+      "blablabla".byterindex(/.{4}/, 5).should == 5
+
+      "blablabla".byterindex(/.{0}/, 3).should == 3
+      "blablabla".byterindex(/.{1}/, 3).should == 3
+      "blablabla".byterindex(/.{2}/, 3).should == 3
+      "blablabla".byterindex(/.{5}/, 3).should == 3
+      "blablabla".byterindex(/.{6}/, 3).should == 3
+
+      "blablabla".byterindex(/.l./, 0).should == 0
+      "blablabla".byterindex(/.l./, 1).should == 0
+      "blablabla".byterindex(/.l./, 2).should == 0
+      "blablabla".byterindex(/.l./, 3).should == 3
+
+      "blablablax".byterindex(/.x/, 10).should == 8
+      "blablablax".byterindex(/.x/, 9).should == 8
+      "blablablax".byterindex(/.x/, 8).should == 8
+
+      "blablablax".byterindex(/..x/, 10).should == 7
+      "blablablax".byterindex(/..x/, 9).should == 7
+      "blablablax".byterindex(/..x/, 8).should == 7
+      "blablablax".byterindex(/..x/, 7).should == 7
+
+      not_supported_on :opal do
+        "blablabla\n".byterindex(/\Z/, 9).should == 9
+      end
+    end
+
+    it "starts the search at offset + self.length if offset is negative" do
+      str = "blablabla"
+
+      ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle|
+        (-str.length .. -1).each do |offset|
+          str.byterindex(needle, offset).should ==
+          str.byterindex(needle, offset + str.length)
+        end
+      end
+    end
+
+    it "returns nil if the substring isn't found" do
+      "blablabla".byterindex(/BLA/).should == nil
+      "blablabla".byterindex(/.{10}/).should == nil
+      "blablablax".byterindex(/.x/, 7).should == nil
+      "blablablax".byterindex(/..x/, 6).should == nil
+
+      not_supported_on :opal do
+        "blablabla".byterindex(/\Z/, 5).should == nil
+        "blablabla".byterindex(/\z/, 5).should == nil
+        "blablabla\n".byterindex(/\z/, 9).should == nil
+      end
+    end
+
+    not_supported_on :opal do
+      it "supports \\G which matches at the given start offset" do
+        "helloYOU.".byterindex(/YOU\G/, 8).should == 5
+        "helloYOU.".byterindex(/YOU\G/).should == nil
+
+        idx = "helloYOUall!".index("YOU")
+        re = /YOU.+\G.+/
+        # The # marks where \G will match.
+        [
+          ["helloYOU#all.", nil],
+          ["helloYOUa#ll.", idx],
+          ["helloYOUal#l.", idx],
+          ["helloYOUall#.", idx],
+          ["helloYOUall.#", nil]
+        ].each do |i|
+          start = i[0].index("#")
+          str = i[0].delete("#")
+
+          str.byterindex(re, start).should == i[1]
+        end
+      end
+    end
+
+    it "tries to convert start_offset to an integer" do
+      obj = mock('5')
+      def obj.to_int() 5 end
+      "str".byterindex(/../, obj).should == 1
+
+      obj = mock('5')
+      def obj.respond_to?(arg, *) true end
+      def obj.method_missing(*args); 5; end
+      "str".byterindex(/../, obj).should == 1
+    end
+
+    it "raises a TypeError when given offset is nil" do
+      -> { "str".byterindex(/../, nil) }.should raise_error(TypeError)
+    end
+
+    it "returns the reverse byte index of a multibyte character" do
+      "ありがりがとう".byterindex("が").should == 12
+      "ありがりがとう".byterindex(/が/).should == 12
+    end
+
+    it "returns the character index before the finish" do
+       "ありがりがとう".byterindex("が", 9).should == 6
+       "ありがりがとう".byterindex(/が/, 9).should == 6
+    end
+  end
+end
diff --git a/spec/ruby/core/string/shared/byte_index_common.rb b/spec/ruby/core/string/shared/byte_index_common.rb
new file mode 100644
index 000000000000..fa73e291ede5
--- /dev/null
+++ b/spec/ruby/core/string/shared/byte_index_common.rb
@@ -0,0 +1,63 @@
+# -*- encoding: utf-8 -*-
+require_relative '../../../spec_helper'
+
+describe :byte_index_common, shared: true do
+  describe "raises on type errors" do
+    it "raises a TypeError if passed nil" do
+      -> { "abc".send(@method, nil) }.should raise_error(TypeError, "no implicit conversion of nil into String")
+    end
+
+    it "raises a TypeError if passed a boolean" do
+      -> { "abc".send(@method, true) }.should raise_error(TypeError, "no implicit conversion of true into String")
+    end
+
+    it "raises a TypeError if passed a Symbol" do
+      not_supported_on :opal do
+        -> { "abc".send(@method, :a) }.should raise_error(TypeError, "no implicit conversion of Symbol into String")
+      end
+    end
+
+    it "raises a TypeError if passed a Symbol" do
+      obj = mock('x')
+      obj.should_not_receive(:to_int)
+      -> { "hello".send(@method, obj) }.should raise_error(TypeError, "no implicit conversion of MockObject into String")
+    end
+
+    it "raises a TypeError if passed an Integer" do
+      -> { "abc".send(@method, 97) }.should raise_error(TypeError, "no implicit conversion of Integer into String")
+    end
+  end
+
+  describe "with multibyte codepoints" do
+    it "raises an IndexError when byte offset lands in the middle of a multibyte character" do
+      -> { "わ".send(@method, "", 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary")
+      -> { "わ".send(@method, "", 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary")
+      -> { "わ".send(@method, "", -1) }.should raise_error(IndexError, "offset 2 does not land on character boundary")
+      -> { "わ".send(@method, "", -2) }.should raise_error(IndexError, "offset 1 does not land on character boundary")
+    end
+
+    it "raises an Encoding::CompatibilityError if the encodings are incompatible" do
+      re = Regexp.new "れ".encode(Encoding::EUC_JP)
+      -> do
+        "あれ".send(@method, re)
+      end.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: UTF-8 and EUC-JP")
+    end
+  end
+
+  describe "with global variables" do
+    it "doesn't set $~ for non regex search" do
+      $~ = nil
+
+      'hello.'.send(@method, 'll')
+      $~.should == nil
+    end
+
+    it "sets $~ to MatchData of match and nil when there's none" do
+      'hello.'.send(@method, /.e./)
+      $~[0].should == 'hel'
+
+      'hello.'.send(@method, /not/)
+      $~.should == nil
+    end
+  end
+end
diff --git a/spec/truffle/methods/String.txt b/spec/truffle/methods/String.txt
index 05427da07527..51a5ad3aff23 100644
--- a/spec/truffle/methods/String.txt
+++ b/spec/truffle/methods/String.txt
@@ -12,6 +12,8 @@
 []=
 ascii_only?
 b
+byteindex
+byterindex
 bytes
 bytesize
 byteslice
diff --git a/spec/truffleruby.next-specs b/spec/truffleruby.next-specs
index 39d167983f08..051c2e10b79a 100644
--- a/spec/truffleruby.next-specs
+++ b/spec/truffleruby.next-specs
@@ -16,3 +16,6 @@ spec/ruby/core/hash/shift_spec.rb
 spec/ruby/core/range/size_spec.rb
 
 spec/ruby/core/string/dedup_spec.rb
+
+spec/ruby/core/string/byteindex_spec.rb
+spec/ruby/core/string/byterindex_spec.rb
diff --git a/src/main/java/org/truffleruby/core/string/StringNodes.java b/src/main/java/org/truffleruby/core/string/StringNodes.java
index 2ff5d3ce3787..67a4015ad05b 100644
--- a/src/main/java/org/truffleruby/core/string/StringNodes.java
+++ b/src/main/java/org/truffleruby/core/string/StringNodes.java
@@ -3957,6 +3957,39 @@ protected Object stringByteIndex(
         }
     }
 
+    /** Search pattern in string starting at offset bytes backwards, and return a byte index or nil */
+    @Primitive(name = "string_byte_reverse_index", lowerFixnum = 3)
+    public abstract static class StringByteReverseIndexNode extends PrimitiveArrayArgumentsNode {
+        @Specialization
+        protected Object stringByteIndex(
+                Object rubyString, Object rubyPattern, RubyEncoding compatibleEncoding, int byteOffset,
+                @Cached RubyStringLibrary libString,
+                @Cached RubyStringLibrary libPattern,
+                @Cached TruffleString.LastByteIndexOfStringNode lastByteIndexOfStringNode,
+                @Cached ConditionProfile indexOutOfBoundsProfile,
+                @Cached ConditionProfile foundProfile) {
+            assert byteOffset >= 0;
+
+            var string = libString.getTString(rubyString);
+            int stringByteLength = libString.byteLength(rubyString);
+
+            var pattern = libPattern.getTString(rubyPattern);
+            int patternByteLength = libPattern.byteLength(rubyPattern);
+
+            if (indexOutOfBoundsProfile.profile(patternByteLength > stringByteLength)) {
+                return nil;
+            }
+
+            int found = lastByteIndexOfStringNode.execute(string, pattern, byteOffset, 0,
+                    compatibleEncoding.tencoding);
+            if (foundProfile.profile(found >= 0)) {
+                return found;
+            }
+
+            return nil;
+        }
+    }
+
     // Port of Rubinius's String::previous_byte_index.
     //
     // This method takes a byte index, finds the corresponding character the byte index belongs to, and then returns
diff --git a/src/main/ruby/truffleruby/core/string.rb b/src/main/ruby/truffleruby/core/string.rb
index 6ab80670b0a8..a0ae08f64553 100644
--- a/src/main/ruby/truffleruby/core/string.rb
+++ b/src/main/ruby/truffleruby/core/string.rb
@@ -1097,6 +1097,82 @@ def rindex(sub, finish = undefined)
     nil
   end
 
+  def byteindex(str, start = undefined)
+    is_regex_pattern = Primitive.is_a?(str, Regexp)
+
+    if Primitive.undefined?(start)
+      start = 0
+    else
+      start = Primitive.rb_to_int(start)
+
+      start += bytesize if start < 0
+      if start < 0 || start > bytesize
+        if is_regex_pattern
+          Primitive.regexp_last_match_set(Primitive.caller_special_variables, nil)
+        end
+
+        return nil
+      end
+    end
+
+    unless Truffle::StringOperations.on_codepoint_boundary?(self, start)
+      raise IndexError, "offset #{start} does not land on character boundary"
+    end
+
+    if is_regex_pattern
+      Primitive.encoding_ensure_compatible(self, str)
+
+      match = Truffle::RegexpOperations.match_from(str, self, start)
+      Primitive.regexp_last_match_set(Primitive.caller_special_variables, match)
+      return match ? Primitive.character_index_to_byte_index(self, match.begin(0)) : nil
+    end
+
+    str = StringValue(str)
+    return start if str.empty?
+    return nil if start + str.bytesize > bytesize
+
+    enc = Primitive.encoding_ensure_compatible_str(self, str)
+    Primitive.string_byte_index(self, str, enc, start)
+  end
+
+  def byterindex(str, finish = undefined)
+    if Primitive.undefined?(finish)
+      finish = bytesize
+    else
+      finish = Primitive.rb_to_int(finish)
+      finish += bytesize if finish < 0
+      return nil if finish < 0
+
+      finish = bytesize if finish > bytesize
+    end
+
+    unless Truffle::StringOperations.on_codepoint_boundary?(self, finish)
+      raise IndexError, "offset #{finish} does not land on character boundary"
+    end
+
+    if Primitive.is_a?(str, Regexp)
+      Primitive.encoding_ensure_compatible(self, str)
+
+      match = Truffle::RegexpOperations.search_region(str, self, 0, finish, false, true)
+      Primitive.regexp_last_match_set(Primitive.caller_special_variables, match)
+      return match ? Primitive.character_index_to_byte_index(self, match.begin(0)) : nil
+    end
+
+    str = StringValue(str)
+    return finish if str.empty?
+    return nil if str.bytesize > bytesize
+
+    # Add `str.size` worth of bytes to `finish` to compensate for `LastByteIndexOfStringNode` which does a reverse
+    # search including the full pattern length (as start pos).
+    finish_adjusted = Primitive.byte_index_to_character_index(self, finish)
+    finish_adjusted += str.size
+    finish_adjusted = size if finish_adjusted > size
+    finish = Primitive.character_index_to_byte_index(self, finish_adjusted)
+
+    enc = Primitive.encoding_ensure_compatible_str(self, str)
+    Primitive.string_byte_reverse_index(self, str, enc, finish)
+  end
+
   def start_with?(*prefixes)
     if prefixes.size == 1 and prefix = prefixes[0] and Primitive.is_a?(prefix, String)
       enc = Primitive.encoding_ensure_compatible_str self, prefix
diff --git a/src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb b/src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb
index cb988edd8006..92dda0d54e7a 100644
--- a/src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb
+++ b/src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb
@@ -68,6 +68,14 @@ def b(...)
       to_s.b(...)
     end
 
+    def byteindex(...)
+      to_s.byteindex(...)
+    end
+
+    def byterindex(...)
+      to_s.byterindex(...)
+    end
+
     def bytes(...)
       to_s.bytes(...)
     end
diff --git a/src/main/ruby/truffleruby/core/truffle/string_operations.rb b/src/main/ruby/truffleruby/core/truffle/string_operations.rb
index 8e6cee160eb6..ea355f641465 100644
--- a/src/main/ruby/truffleruby/core/truffle/string_operations.rb
+++ b/src/main/ruby/truffleruby/core/truffle/string_operations.rb
@@ -411,5 +411,16 @@ def self.assign_regexp(string, index, count, replacement)
 
       Primitive.string_splice(string, replacement, bi, bs, enc)
     end
+
+    def self.on_codepoint_boundary?(string, byte_pos)
+      char_pos = Primitive.byte_index_to_character_index(string, byte_pos)
+      adjusted_byte_pos = if char_pos >= string.size
+                            string.bytesize
+                          else
+                            Primitive.character_index_to_byte_index(string, char_pos)
+                          end
+
+      byte_pos == adjusted_byte_pos
+    end
   end
 end