Skip to content

8361842: Move input validation checks to Java for java.lang.StringCoding intrinsics #25998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 31 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ac5df9f
Move `StringCoding::countPositives` checks from C++ to Java
vy Jun 26, 2025
1498824
Apply review feedback
vy Jun 26, 2025
196fc5d
Add `StringCodingCountPositives` benchmark
vy Jul 4, 2025
9932dd3
Improve intrinsics in `StringCoding`
vy Jul 9, 2025
14275e5
Remove `StringCodingCountPositives`, `String{En,De}code` already cove…
vy Jul 10, 2025
b9a6adf
Fix `EUC_JP.java.template` broken due to `encodeASCII` rename
vy Jul 10, 2025
6af9864
Merge remote-tracking branch 'upstream/master' into strIntrinCheck
vy Jul 10, 2025
c331fbf
Improve wording of the `VerifyIntrinsicChecks` flag
vy Jul 15, 2025
b60ff45
Remove Markdown-styling in comments
vy Jul 15, 2025
7c042b3
Minimize the number of touched lines in `vmIntrinsics.hpp`
vy Jul 15, 2025
2672f7c
Apply review feedback (styling changes)
vy Jul 15, 2025
85f1986
Merge remote-tracking branch 'upstream/master' into strIntrinCheck
vy Jul 15, 2025
2b89e88
Improve `generate_string_range_check` changes
vy Jul 16, 2025
bcb073c
Add test verifying the effectiveness of `VerifyIntrinsicChecks`
vy Jul 16, 2025
bfc3017
Fix compiler error in `generate_string_range_check`
vy Jul 17, 2025
abc0eeb
Duplicate affected tests with `-XX:+VerifyIntrinsicChecks` variants
vy Jul 17, 2025
db1ed38
Replace casting with `as_Region()` in `generate_string_range_check`
vy Jul 17, 2025
7a6cd39
Fix out-of-bounds in `sun.nio.cs.SingleByte.Encoder::encodeArrayLoop`
vy Jul 17, 2025
8c712ff
Relax target array capacity check for intrinsic Java wrappers
vy Jul 18, 2025
4016c7a
Disable `TestVerifyIntrinsicChecks` for GraalVM
vy Jul 18, 2025
943f840
Fix `encodeISOArray` bounds checks and Javadoc
vy Jul 18, 2025
fb8f6ef
Make `StringCoding` encoding intrinsics lenient
vy Jul 21, 2025
f69374f
Merge remote-tracking branch 'upstream/master' into strIntrinCheck
vy Jul 21, 2025
86e3ed8
Remove superseded `@throws` Javadoc
vy Jul 21, 2025
025c7ef
Fix bit shifting
vy Jul 21, 2025
07cd41c
Cap destination array bounds
vy Jul 22, 2025
cb4780d
Make source array bound checks lenient too
vy Jul 22, 2025
dc5e673
Improve wording of `@param len`
vy Jul 23, 2025
1d02189
Add `@bug` tags
vy Jul 23, 2025
e70dfa3
Replace `requireNonNull` with implicit null checks to reduce bytecode…
vy Jul 25, 2025
c322f0e
Merge remote-tracking branch 'upstream/master' into strIntrinCheck
vy Jul 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6421,10 +6421,14 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)

// Intrinsic for
//
// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
// return the number of characters copied.
// - java/lang/StringUTF16.compress
// return index of non-latin1 character if copy fails, otherwise 'len'.
// - sun.nio.cs.ISO_8859_1.Encoder#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
// Encodes char[] to byte[] in ISO-8859-1
//
// - java.lang.StringCoding#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
// Encodes byte[] (containing UTF-16) to byte[] in ISO-8859-1
//
// - java.lang.StringCoding#encodeAsciiArray0(char[] sa, int sp, byte[] da, int dp, int len)
// Encodes char[] to byte[] in ASCII
//
// This version always returns the number of characters copied, and does not
// clobber the 'len' register. A successful copy will complete with the post-
Expand Down
12 changes: 8 additions & 4 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2825,10 +2825,14 @@ void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Regist

// Intrinsic for
//
// - sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray
// return the number of characters copied.
// - java/lang/StringUTF16.compress
// return index of non-latin1 character if copy fails, otherwise 'len'.
// - sun.nio.cs.ISO_8859_1.Encoder#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
// Encodes char[] to byte[] in ISO-8859-1
//
// - java.lang.StringCoding#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
// Encodes byte[] (containing UTF-16) to byte[] in ISO-8859-1
//
// - java.lang.StringCoding#encodeAsciiArray0(char[] sa, int sp, byte[] da, int dp, int len)
// Encodes char[] to byte[] in ASCII
//
// This version always returns the number of characters copied. A successful
// copy will complete with the post-condition: 'res' == 'len', while an
Expand Down
66 changes: 40 additions & 26 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6027,32 +6027,46 @@ void MacroAssembler::evpbroadcast(BasicType type, XMMRegister dst, Register src,
}
}

// encode char[] to byte[] in ISO_8859_1 or ASCII
//@IntrinsicCandidate
//private static int implEncodeISOArray(byte[] sa, int sp,
//byte[] da, int dp, int len) {
// int i = 0;
// for (; i < len; i++) {
// char c = StringUTF16.getChar(sa, sp++);
// if (c > '\u00FF')
// break;
// da[dp++] = (byte)c;
// }
// return i;
//}
//
//@IntrinsicCandidate
//private static int implEncodeAsciiArray(char[] sa, int sp,
// byte[] da, int dp, int len) {
// int i = 0;
// for (; i < len; i++) {
// char c = sa[sp++];
// if (c >= '\u0080')
// break;
// da[dp++] = (byte)c;
// }
// return i;
//}
// Encode given char[]/byte[] to byte[] in ISO_8859_1 or ASCII
//
// @IntrinsicCandidate
// int sun.nio.cs.ISO_8859_1.Encoder#encodeISOArray0(
// char[] sa, int sp, byte[] da, int dp, int len) {
// int i = 0;
// for (; i < len; i++) {
// char c = sa[sp++];
// if (c > '\u00FF')
// break;
// da[dp++] = (byte) c;
// }
// return i;
// }
//
// @IntrinsicCandidate
// int java.lang.StringCoding.encodeISOArray0(
// byte[] sa, int sp, byte[] da, int dp, int len) {
// int i = 0;
// for (; i < len; i++) {
// char c = StringUTF16.getChar(sa, sp++);
// if (c > '\u00FF')
// break;
// da[dp++] = (byte) c;
// }
// return i;
// }
//
// @IntrinsicCandidate
// int java.lang.StringCoding.encodeAsciiArray0(
// char[] sa, int sp, byte[] da, int dp, int len) {
// int i = 0;
// for (; i < len; i++) {
// char c = sa[sp++];
// if (c >= '\u0080')
// break;
// da[dp++] = (byte) c;
// }
// return i;
// }
void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
XMMRegister tmp1Reg, XMMRegister tmp2Reg,
XMMRegister tmp3Reg, XMMRegister tmp4Reg,
Expand Down
6 changes: 3 additions & 3 deletions src/hotspot/share/classfile/vmIntrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,18 +415,18 @@ class methodHandle;
\
do_class(java_lang_StringCoding, "java/lang/StringCoding") \
do_intrinsic(_countPositives, java_lang_StringCoding, countPositives_name, countPositives_signature, F_S) \
do_name( countPositives_name, "countPositives") \
do_name( countPositives_name, "countPositives0") \
do_signature(countPositives_signature, "([BII)I") \
\
do_class(sun_nio_cs_iso8859_1_Encoder, "sun/nio/cs/ISO_8859_1$Encoder") \
do_intrinsic(_encodeISOArray, sun_nio_cs_iso8859_1_Encoder, encodeISOArray_name, encodeISOArray_signature, F_S) \
do_name( encodeISOArray_name, "implEncodeISOArray") \
do_name( encodeISOArray_name, "encodeISOArray0") \
do_signature(encodeISOArray_signature, "([CI[BII)I") \
\
do_intrinsic(_encodeByteISOArray, java_lang_StringCoding, encodeISOArray_name, indexOfI_signature, F_S) \
\
do_intrinsic(_encodeAsciiArray, java_lang_StringCoding, encodeAsciiArray_name, encodeISOArray_signature, F_S) \
do_name( encodeAsciiArray_name, "implEncodeAsciiArray") \
do_name( encodeAsciiArray_name, "encodeAsciiArray0") \
\
do_class(java_math_BigInteger, "java/math/BigInteger") \
do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_S) \
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/share/opto/c2_globals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,9 @@
product(bool, PrintIntrinsics, false, DIAGNOSTIC, \
"prints attempted and successful inlining of intrinsics") \
\
develop(bool, VerifyIntrinsicChecks, false, \
"Verify in intrinsic that Java level checks work as expected") \
\
develop(bool, StressReflectiveCode, false, \
"Use inexact types at allocations, etc., to test reflection") \
\
Expand Down
57 changes: 44 additions & 13 deletions src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,11 @@ inline Node* LibraryCallKit::generate_limit_guard(Node* offset,
}

// Emit range checks for the given String.value byte array
void LibraryCallKit::generate_string_range_check(Node* array, Node* offset, Node* count, bool char_count) {
void LibraryCallKit::generate_string_range_check(Node* array,
Node* offset,
Node* count,
bool char_count,
bool halt) {
if (stopped()) {
return; // already stopped
}
Expand All @@ -957,10 +961,17 @@ void LibraryCallKit::generate_string_range_check(Node* array, Node* offset, Node
generate_limit_guard(offset, count, load_array_length(array), bailout);

if (bailout->req() > 1) {
PreserveJVMState pjvms(this);
set_control(_gvn.transform(bailout));
uncommon_trap(Deoptimization::Reason_intrinsic,
Deoptimization::Action_maybe_recompile);
bailout = _gvn.transform(bailout)->as_Region();
if (halt) {
Node* frame = _gvn.transform(new ParmNode(C->start(), TypeFunc::FramePtr));
Node* halt = _gvn.transform(new HaltNode(bailout, frame, "unexpected guard failure in intrinsic"));
C->root()->add_req(halt);
} else {
PreserveJVMState pjvms(this);
set_control(bailout);
uncommon_trap(Deoptimization::Reason_intrinsic,
Deoptimization::Action_maybe_recompile);
}
}
}

Expand Down Expand Up @@ -1118,6 +1129,7 @@ bool LibraryCallKit::inline_array_equals(StrIntrinsicNode::ArgEnc ae) {


//------------------------------inline_countPositives------------------------------
// int java.lang.StringCoding#countPositives0(byte[] ba, int off, int len)
bool LibraryCallKit::inline_countPositives() {
if (too_many_traps(Deoptimization::Reason_intrinsic)) {
return false;
Expand All @@ -1129,13 +1141,14 @@ bool LibraryCallKit::inline_countPositives() {
Node* offset = argument(1);
Node* len = argument(2);

ba = must_be_not_null(ba, true);

// Range checks
generate_string_range_check(ba, offset, len, false);
if (stopped()) {
return true;
if (VerifyIntrinsicChecks) {
ba = must_be_not_null(ba, true);
generate_string_range_check(ba, offset, len, false, true);
if (stopped()) {
return true;
}
}

Node* ba_start = array_element_address(ba, offset, T_BYTE);
Node* result = new CountPositivesNode(control(), memory(TypeAryPtr::BYTES), ba_start, len);
set_result(_gvn.transform(result));
Expand Down Expand Up @@ -6134,6 +6147,9 @@ CallStaticJavaNode* LibraryCallKit::get_uncommon_trap_from_success_proj(Node* no
}

//-------------inline_encodeISOArray-----------------------------------
// int sun.nio.cs.ISO_8859_1.Encoder#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
// int java.lang.StringCoding#encodeISOArray0(byte[] sa, int sp, byte[] da, int dp, int len)
// int java.lang.StringCoding#encodeAsciiArray0(char[] sa, int sp, byte[] da, int dp, int len)
// encode char[] to byte[] in ISO_8859_1 or ASCII
bool LibraryCallKit::inline_encodeISOArray(bool ascii) {
assert(callee()->signature()->size() == 5, "encodeISOArray has 5 parameters");
Expand All @@ -6144,8 +6160,14 @@ bool LibraryCallKit::inline_encodeISOArray(bool ascii) {
Node *dst_offset = argument(3);
Node *length = argument(4);

src = must_be_not_null(src, true);
dst = must_be_not_null(dst, true);
// Cast source & target arrays to not-null
if (VerifyIntrinsicChecks) {
src = must_be_not_null(src, true);
dst = must_be_not_null(dst, true);
if (stopped()) {
return true;
}
}

const TypeAryPtr* src_type = src->Value(&_gvn)->isa_aryptr();
const TypeAryPtr* dst_type = dst->Value(&_gvn)->isa_aryptr();
Expand All @@ -6162,6 +6184,15 @@ bool LibraryCallKit::inline_encodeISOArray(bool ascii) {
return false;
}

// Check source & target bounds
if (VerifyIntrinsicChecks) {
generate_string_range_check(src, src_offset, length, src_elem == T_BYTE, true);
generate_string_range_check(dst, dst_offset, length, false, true);
if (stopped()) {
return true;
}
}

Node* src_start = array_element_address(src, src_offset, T_CHAR);
Node* dst_start = array_element_address(dst, dst_offset, dst_elem);
// 'src_start' points to src array + scaled offset
Expand Down
3 changes: 2 additions & 1 deletion src/hotspot/share/opto/library_call.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ class LibraryCallKit : public GraphKit {
Node* array_length,
RegionNode* region);
void generate_string_range_check(Node* array, Node* offset,
Node* length, bool char_count);
Node* length, bool char_count,
bool halt = false);
Node* current_thread_helper(Node* &tls_output, ByteSize handle_offset,
bool is_immutable);
Node* generate_current_thread(Node* &tls_output);
Expand Down
2 changes: 1 addition & 1 deletion src/java.base/share/classes/java/lang/String.java
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,7 @@ private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
int sp = 0;
int sl = len;
while (sp < sl) {
int ret = StringCoding.implEncodeISOArray(val, sp, dst, dp, len);
int ret = StringCoding.encodeISOArray(val, sp, dst, dp, len);
sp = sp + ret;
dp = dp + ret;
if (ret != len) {
Expand Down
Loading