diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/VM.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/VM.java index 2dcdeddf0c..5dc20ddb4e 100644 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/VM.java +++ b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/VM.java @@ -22,7 +22,6 @@ import java.util.Comparator; import de.mirkosertic.bytecoder.api.Export; -import de.mirkosertic.bytecoder.classlib.java.nio.charset.UTF_8; public class VM { diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/lang/TCharacterData.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/lang/TCharacterData.java new file mode 100644 index 0000000000..326726d519 --- /dev/null +++ b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/lang/TCharacterData.java @@ -0,0 +1,113 @@ +/* + * Copyright 2019 Mirko Sertic + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.mirkosertic.bytecoder.classlib.java.lang; + +import de.mirkosertic.bytecoder.api.AnyTypeMatches; +import de.mirkosertic.bytecoder.api.SubstitutesInClass; + +@SubstitutesInClass(completeReplace = false) +abstract class TCharacterData { + + private static Object defaultData; + private static Object data00; + private static Object data01; + private static Object data02; + private static Object data0E; + private static Object dataPrivate; + private static Object dataUndefined; + + static AnyTypeMatches of(final int ch) { + if (ch >>> 8 == 0) { // fast-path + if (defaultData == null) { + try { + defaultData = Class.forName("java.lang.CharacterDataLatin1").newInstance(); + } catch (final Exception e) { + throw new IllegalArgumentException("Not supported", e); + } + } + return (AnyTypeMatches) defaultData; + } else { + switch (ch >>> 16) { //plane 00-16 + case (0): { + if (data00 == null) { + try { + data00 = Class.forName("java.lang.CharacterData00").newInstance(); + } catch (final Exception e) { + throw new IllegalArgumentException("Not supported", e); + } + } + return (AnyTypeMatches) data00; + + } + case (1): { + if (data01 == null) { + try { + data01 = Class.forName("java.lang.CharacterData01").newInstance(); + } catch (final Exception e) { + throw new IllegalArgumentException("Not supported", e); + } + } + return (AnyTypeMatches) data01; + + } + case (2): { + if (data02 == null) { + try { + data02 = Class.forName("java.lang.CharacterData02").newInstance(); + } catch (final Exception e) { + throw new IllegalArgumentException("Not supported", e); + } + } + return (AnyTypeMatches) data02; + + } + case (14): { + if (data0E == null) { + try { + data0E = Class.forName("java.lang.CharacterData0E").newInstance(); + } catch (final Exception e) { + throw new IllegalArgumentException("Not supported", e); + } + } + return (AnyTypeMatches) data0E; + + } + case (15): // Private Use + case (16): // Private Use + { + if (dataPrivate == null) { + try { + dataPrivate = Class.forName("java.lang.CharacterDataPrivateUse").newInstance(); + } catch (final Exception e) { + throw new IllegalArgumentException("Not supported", e); + } + } + return (AnyTypeMatches) dataPrivate; + } + default: { + if (dataUndefined == null) { + try { + dataUndefined = Class.forName("java.lang.CharacterDataUndefined").newInstance(); + } catch (final Exception e) { + throw new IllegalArgumentException("Not supported", e); + } + } + return (AnyTypeMatches) dataUndefined; + } + } + } + } +} diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/ISO_8859_1.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/ISO_8859_1.java deleted file mode 100644 index 8fabc0e326..0000000000 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/ISO_8859_1.java +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package de.mirkosertic.bytecoder.classlib.java.nio.charset; - -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CoderResult; -import java.util.Objects; - -public class ISO_8859_1 extends Charset -{ - public static final ISO_8859_1 INSTANCE = new ISO_8859_1(); - - public ISO_8859_1() { - super("ISO-8859-1", StandardCharsets.aliases_ISO_8859_1()); - } - - public String historicalName() { - return "ISO8859_1"; - } - - public boolean contains(final Charset cs) { - return ((cs instanceof ISO_8859_1)); - } - - public CharsetDecoder newDecoder() { - return new Decoder(this); - } - - public CharsetEncoder newEncoder() { - return new Encoder(this); - } - - private static class Decoder extends CharsetDecoder { - - private Decoder(final Charset cs) { - super(cs, 1.0f, 1.0f); - } - - private CoderResult decodeArrayLoop(final ByteBuffer src, - final CharBuffer dst) - { - final byte[] sa = src.array(); - int sp = src.arrayOffset() + src.position(); - final int sl = src.arrayOffset() + src.limit(); - assert (sp <= sl); - sp = (sp <= sl ? sp : sl); - final char[] da = dst.array(); - int dp = dst.arrayOffset() + dst.position(); - final int dl = dst.arrayOffset() + dst.limit(); - assert (dp <= dl); - dp = (dp <= dl ? dp : dl); - - try { - while (sp < sl) { - final byte b = sa[sp]; - if (dp >= dl) - return CoderResult.OVERFLOW; - da[dp++] = (char)(b & 0xff); - sp++; - } - return CoderResult.UNDERFLOW; - } finally { - src.position(sp - src.arrayOffset()); - dst.position(dp - dst.arrayOffset()); - } - } - - private CoderResult decodeBufferLoop(final ByteBuffer src, - final CharBuffer dst) - { - int mark = src.position(); - try { - while (src.hasRemaining()) { - final byte b = src.get(); - if (!dst.hasRemaining()) - return CoderResult.OVERFLOW; - dst.put((char)(b & 0xff)); - mark++; - } - return CoderResult.UNDERFLOW; - } finally { - src.position(mark); - } - } - - protected CoderResult decodeLoop(final ByteBuffer src, - final CharBuffer dst) - { - if (src.hasArray() && dst.hasArray()) - return decodeArrayLoop(src, dst); - else - return decodeBufferLoop(src, dst); - } - } - - private static class Encoder extends CharsetEncoder { - - private Encoder(final Charset cs) { - super(cs, 1.0f, 1.0f); - } - - public boolean canEncode(final char c) { - return c <= '\u00FF'; - } - - public boolean isLegalReplacement(final byte[] repl) { - return true; // we accept any byte value - } - - private final Surrogate.Parser sgp = new Surrogate.Parser(); - - // Method possible replaced with a compiler intrinsic. - private static int encodeISOArray(final char[] sa, final int sp, - final byte[] da, final int dp, final int len) { - if (len <= 0) { - return 0; - } - encodeISOArrayCheck(sa, sp, da, dp, len); - return implEncodeISOArray(sa, sp, da, dp, len); - } - - private static int implEncodeISOArray(final char[] sa, int sp, - final byte[] da, int dp, final int len) - { - int i = 0; - for (; i < len; i++) { - final char c = sa[sp++]; - if (c > '\u00FF') - break; - da[dp++] = (byte)c; - } - return i; - } - - private static void encodeISOArrayCheck(final char[] sa, final int sp, - final byte[] da, final int dp, final int len) { - Objects.requireNonNull(sa); - Objects.requireNonNull(da); - - if (sp < 0 || sp >= sa.length) { - throw new ArrayIndexOutOfBoundsException(sp); - } - - if (dp < 0 || dp >= da.length) { - throw new ArrayIndexOutOfBoundsException(dp); - } - - final int endIndexSP = sp + len - 1; - if (endIndexSP < 0 || endIndexSP >= sa.length) { - throw new ArrayIndexOutOfBoundsException(endIndexSP); - } - - final int endIndexDP = dp + len - 1; - if (endIndexDP < 0 || endIndexDP >= da.length) { - throw new ArrayIndexOutOfBoundsException(endIndexDP); - } - } - - private CoderResult encodeArrayLoop(final CharBuffer src, - final ByteBuffer dst) - { - final char[] sa = src.array(); - final int soff = src.arrayOffset(); - int sp = soff + src.position(); - final int sl = soff + src.limit(); - assert (sp <= sl); - sp = (sp <= sl ? sp : sl); - final byte[] da = dst.array(); - final int doff = dst.arrayOffset(); - int dp = doff + dst.position(); - final int dl = doff + dst.limit(); - assert (dp <= dl); - dp = (dp <= dl ? dp : dl); - final int dlen = dl - dp; - final int slen = sl - sp; - final int len = (dlen < slen) ? dlen : slen; - try { - final int ret = encodeISOArray(sa, sp, da, dp, len); - sp = sp + ret; - dp = dp + ret; - if (ret != len) { - if (sgp.parse(sa[sp], sa, sp, sl) < 0) - return sgp.error(); - return sgp.unmappableResult(); - } - if (len < slen) - return CoderResult.OVERFLOW; - return CoderResult.UNDERFLOW; - } finally { - src.position(sp - soff); - dst.position(dp - doff); - } - } - - private CoderResult encodeBufferLoop(final CharBuffer src, - final ByteBuffer dst) - { - int mark = src.position(); - try { - while (src.hasRemaining()) { - final char c = src.get(); - if (c <= '\u00FF') { - if (!dst.hasRemaining()) - return CoderResult.OVERFLOW; - dst.put((byte)c); - mark++; - continue; - } - if (sgp.parse(c, src) < 0) - return sgp.error(); - return sgp.unmappableResult(); - } - return CoderResult.UNDERFLOW; - } finally { - src.position(mark); - } - } - - protected CoderResult encodeLoop(final CharBuffer src, - final ByteBuffer dst) - { - if (src.hasArray() && dst.hasArray()) - return encodeArrayLoop(src, dst); - else - return encodeBufferLoop(src, dst); - } - } -} diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/StandardCharsets.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/StandardCharsets.java index 47d338c7c8..ea8558076e 100644 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/StandardCharsets.java +++ b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/StandardCharsets.java @@ -46,45 +46,48 @@ public boolean hasNext() { @Override public Charset next() { c++; - if (c == 1) { - return UTF_8.INSTANCE; + try { + if (c == 1) { + return (Charset) Class.forName("sun.nio.cs.UTF_8").newInstance(); + } + if (c == 2) { + return (Charset) Class.forName("sun.nio.cs.UTF_16").newInstance(); + } + if (c == 3) { + return (Charset) Class.forName("sun.nio.cs.ISO_88591_1").newInstance(); + } + } catch (final Exception e) { + throw new IllegalStateException("Instantiation erroe", e); } - if (c == 2) { - return UTF_16.INSTANCE; - } - if (c == 3) { - return ISO_8859_1.INSTANCE; - } - throw new IllegalStateException("EOL"); } }; } - public Charset charsetForName(final String charsetName) { + public Charset charsetForName(final String charsetName) throws ClassNotFoundException, IllegalAccessException, InstantiationException { for (final String name : aliases_UTF_8()) { if (name.equalsIgnoreCase(charsetName)) { - return UTF_8.INSTANCE; + return (Charset) Class.forName("sun.nio.cs.UTF_8").newInstance(); } } for (final String name : aliases_UTF_16()) { if (name.equalsIgnoreCase(charsetName)) { - return UTF_16.INSTANCE; + return (Charset) Class.forName("sun.nio.cs.UTF_16").newInstance(); } } for (final String name : aliases_ISO_8859_1()) { if (name.equalsIgnoreCase(charsetName)) { - return ISO_8859_1.INSTANCE; + return (Charset) Class.forName("sun.nio.cs.ISO_8859_1").newInstance(); } } - if (charsetName.equals(UTF_8.INSTANCE.name())) { - return UTF_8.INSTANCE; + if (charsetName.equals("UTF-8")) { + return (Charset) Class.forName("sun.nio.cs.UTF_8").newInstance(); } - if (charsetName.equals(UTF_16.INSTANCE.name())) { - return UTF_16.INSTANCE; + if (charsetName.equals("UTF-16")) { + return (Charset) Class.forName("sun.nio.cs.UTF_16").newInstance(); } - if (charsetName.equals(ISO_8859_1.INSTANCE.name())) { - return ISO_8859_1.INSTANCE; + if (charsetName.equals("ISO-8859-1")) { + return (Charset) Class.forName("sun.nio.cs.ISO_8859_1").newInstance(); } return null; } diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/Surrogate.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/Surrogate.java deleted file mode 100644 index f37bc2935a..0000000000 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/Surrogate.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright 2017 Mirko Sertic - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.mirkosertic.bytecoder.classlib.java.nio.charset; - -import java.nio.CharBuffer; -import java.nio.charset.CoderResult; - -public class Surrogate { - - private Surrogate() { } - - // TODO: Deprecate/remove the following redundant definitions - public static final char MIN_HIGH = Character.MIN_HIGH_SURROGATE; - public static final char MAX_HIGH = Character.MAX_HIGH_SURROGATE; - public static final char MIN_LOW = Character.MIN_LOW_SURROGATE; - public static final char MAX_LOW = Character.MAX_LOW_SURROGATE; - public static final char MIN = Character.MIN_SURROGATE; - public static final char MAX = Character.MAX_SURROGATE; - public static final int UCS4_MIN = Character.MIN_SUPPLEMENTARY_CODE_POINT; - public static final int UCS4_MAX = Character.MAX_CODE_POINT; - - public static boolean isHigh(final int c) { - return (MIN_HIGH <= c) && (c <= MAX_HIGH); - } - - public static boolean isLow(final int c) { - return (MIN_LOW <= c) && (c <= MAX_LOW); - } - - public static boolean is(final int c) { - return (MIN <= c) && (c <= MAX); - } - - public static boolean neededFor(final int uc) { - return Character.isSupplementaryCodePoint(uc); - } - - public static char high(final int uc) { - assert Character.isSupplementaryCodePoint(uc); - return Character.highSurrogate(uc); - } - - public static char low(final int uc) { - assert Character.isSupplementaryCodePoint(uc); - return Character.lowSurrogate(uc); - } - - public static int toUCS4(final char c, final char d) { - assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d); - return Character.toCodePoint(c, d); - } - - public static class Parser { - - public Parser() { } - - private int character; // UCS-4 - private CoderResult error = CoderResult.UNDERFLOW; - private boolean isPair; - - public int character() { - assert (error == null); - return character; - } - - public boolean isPair() { - assert (error == null); - return isPair; - } - - public int increment() { - assert (error == null); - return isPair ? 2 : 1; - } - - public CoderResult error() { - assert (error != null); - return error; - } - - public CoderResult unmappableResult() { - assert (error == null); - return CoderResult.unmappableForLength(isPair ? 2 : 1); - } - - public int parse(final char c, final CharBuffer in) { - if (Character.isHighSurrogate(c)) { - if (!in.hasRemaining()) { - error = CoderResult.UNDERFLOW; - return -1; - } - final char d = in.get(); - if (Character.isLowSurrogate(d)) { - character = Character.toCodePoint(c, d); - isPair = true; - error = null; - return character; - } - error = CoderResult.malformedForLength(1); - return -1; - } - if (Character.isLowSurrogate(c)) { - error = CoderResult.malformedForLength(1); - return -1; - } - character = c; - isPair = false; - error = null; - return character; - } - - public int parse(final char c, final char[] ia, final int ip, final int il) { - assert (ia[ip] == c); - if (Character.isHighSurrogate(c)) { - if (il - ip < 2) { - error = CoderResult.UNDERFLOW; - return -1; - } - final char d = ia[ip + 1]; - if (Character.isLowSurrogate(d)) { - character = Character.toCodePoint(c, d); - isPair = true; - error = null; - return character; - } - error = CoderResult.malformedForLength(1); - return -1; - } - if (Character.isLowSurrogate(c)) { - error = CoderResult.malformedForLength(1); - return -1; - } - character = c; - isPair = false; - error = null; - return character; - } - - } - - public static class Generator { - - public Generator() { } - - private CoderResult error = CoderResult.OVERFLOW; - - public CoderResult error() { - assert error != null; - return error; - } - - public int generate(final int uc, final int len, final CharBuffer dst) { - if (Character.isBmpCodePoint(uc)) { - final char c = (char) uc; - if (Character.isSurrogate(c)) { - error = CoderResult.malformedForLength(len); - return -1; - } - if (dst.remaining() < 1) { - error = CoderResult.OVERFLOW; - return -1; - } - dst.put(c); - error = null; - return 1; - } else if (Character.isValidCodePoint(uc)) { - if (dst.remaining() < 2) { - error = CoderResult.OVERFLOW; - return -1; - } - dst.put(Character.highSurrogate(uc)); - dst.put(Character.lowSurrogate(uc)); - error = null; - return 2; - } else { - error = CoderResult.unmappableForLength(len); - return -1; - } - } - - public int generate(final int uc, final int len, final char[] da, final int dp, final int dl) { - if (Character.isBmpCodePoint(uc)) { - final char c = (char) uc; - if (Character.isSurrogate(c)) { - error = CoderResult.malformedForLength(len); - return -1; - } - if (dl - dp < 1) { - error = CoderResult.OVERFLOW; - return -1; - } - da[dp] = c; - error = null; - return 1; - } else if (Character.isValidCodePoint(uc)) { - if (dl - dp < 2) { - error = CoderResult.OVERFLOW; - return -1; - } - da[dp] = Character.highSurrogate(uc); - da[dp + 1] = Character.lowSurrogate(uc); - error = null; - return 2; - } else { - error = CoderResult.unmappableForLength(len); - return -1; - } - } - } -} diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/TCharset.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/TCharset.java index 818db6e9cd..0751096b14 100644 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/TCharset.java +++ b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/TCharset.java @@ -92,12 +92,14 @@ private static java.nio.charset.Charset lookup2(final String charsetName) { return (java.nio.charset.Charset)a[1]; } final java.nio.charset.Charset cs; - if ((cs = standardProvider.charsetForName(charsetName)) != null) - { - cache(charsetName, cs); - return cs; + try { + if ((cs = standardProvider.charsetForName(charsetName)) != null) { + cache(charsetName, cs); + return cs; + } + } catch (final Exception e) { + throw new IllegalArgumentException("Instantiation error", e); } - /* Only need to check the name if we didn't find a charset for it */ checkName(charsetName); return null; @@ -135,9 +137,9 @@ public static SortedMap availableCharsets() { private static volatile java.nio.charset.Charset defaultCharset; - public static java.nio.charset.Charset defaultCharset() { + public static java.nio.charset.Charset defaultCharset() throws ClassNotFoundException, IllegalAccessException, InstantiationException { if (defaultCharset == null) { - defaultCharset = UTF_8.INSTANCE; + defaultCharset = (Charset) Class.forName("sun.nio.cs.UTF_8").newInstance(); } return defaultCharset; } diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/TStandardCharsets.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/TStandardCharsets.java index 553f0f6503..124171c234 100644 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/TStandardCharsets.java +++ b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/TStandardCharsets.java @@ -24,5 +24,5 @@ public class TStandardCharsets { public static final Charset UTF_16 = Charset.forName("UTF-16"); public static final Charset UTF_8 = Charset.forName("UTF-8"); - public static final Charset ISO_8859_1 = Charset.forName("ISO_8859_1"); + public static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); } diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UTF_16.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UTF_16.java deleted file mode 100644 index ce56a197bb..0000000000 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UTF_16.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2017 Mirko Sertic - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.mirkosertic.bytecoder.classlib.java.nio.charset; - -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; - -public class UTF_16 extends Unicode { - - public static final UTF_16 INSTANCE = new UTF_16(); - - public UTF_16() { - super("UTF-16", StandardCharsets.aliases_UTF_16()); - } - - public String historicalName() { - return "UTF-16"; - } - - public CharsetDecoder newDecoder() { - return new Decoder(this); - } - - public CharsetEncoder newEncoder() { - return new Encoder(this); - } - - private static class Decoder extends UnicodeDecoder { - - public Decoder(final Charset cs) { - super(cs, NONE); - } - } - - private static class Encoder extends UnicodeEncoder { - - public Encoder(final Charset cs) { - super(cs, BIG, true); - } - } - -} diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UTF_8.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UTF_8.java deleted file mode 100644 index 33ae11ae65..0000000000 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UTF_8.java +++ /dev/null @@ -1,529 +0,0 @@ -/* - * Copyright 2017 Mirko Sertic - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.mirkosertic.bytecoder.classlib.java.nio.charset; - -import java.nio.Buffer; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CoderResult; - -public class UTF_8 extends Unicode { - - public static final UTF_8 INSTANCE = new UTF_8(); - - public UTF_8() { - super("UTF-8", StandardCharsets.aliases_UTF_8()); - } - - public String historicalName() { - return "UTF8"; - } - - public CharsetDecoder newDecoder() { - return new Decoder(this); - } - - public CharsetEncoder newEncoder() { - return new Encoder(this); - } - - static final void updatePositions(final Buffer src, final int sp, - final Buffer dst, final int dp) { - src.position(sp - src.arrayOffset()); - dst.position(dp - dst.arrayOffset()); - } - - private static class Decoder extends CharsetDecoder { - - private Decoder(final Charset cs) { - super(cs, 1.0f, 1.0f); - } - - private static boolean isNotContinuation(final int b) { - return (b & 0xc0) != 0x80; - } - - // [E0] [A0..BF] [80..BF] - // [E1..EF] [80..BF] [80..BF] - private static boolean isMalformed3(final int b1, final int b2, final int b3) { - return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || - (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80; - } - - // only used when there is only one byte left in src buffer - private static boolean isMalformed3_2(final int b1, final int b2) { - return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || - (b2 & 0xc0) != 0x80; - } - - // [F0] [90..BF] [80..BF] [80..BF] - // [F1..F3] [80..BF] [80..BF] [80..BF] - // [F4] [80..8F] [80..BF] [80..BF] - // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] - // will be checked by Character.isSupplementaryCodePoint(uc) - private static boolean isMalformed4(final int b2, final int b3, final int b4) { - return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || - (b4 & 0xc0) != 0x80; - } - - // only used when there is less than 4 bytes left in src buffer. - // both b1 and b2 should be "& 0xff" before passed in. - private static boolean isMalformed4_2(final int b1, final int b2) { - return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || - (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || - (b2 & 0xc0) != 0x80; - } - - // tests if b1 and b2 are malformed as the first 2 bytes of a - // legal`4-byte utf-8 byte sequence. - // only used when there is less than 4 bytes left in src buffer, - // after isMalformed4_2 has been invoked. - private static boolean isMalformed4_3(final int b3) { - return (b3 & 0xc0) != 0x80; - } - - private static CoderResult malformedN(final ByteBuffer src, final int nb) { - switch (nb) { - case 1: - case 2: // always 1 - return CoderResult.malformedForLength(1); - case 3: - int b1 = src.get(); - int b2 = src.get(); // no need to lookup b3 - return CoderResult.malformedForLength( - ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) || - isNotContinuation(b2)) ? 1 : 2); - case 4: // we don't care the speed here - b1 = src.get() & 0xff; - b2 = src.get() & 0xff; - if (b1 > 0xf4 || - (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) || - (b1 == 0xf4 && (b2 & 0xf0) != 0x80) || - isNotContinuation(b2)) - return CoderResult.malformedForLength(1); - if (isNotContinuation(src.get())) - return CoderResult.malformedForLength(2); - return CoderResult.malformedForLength(3); - default: - assert false; - return null; - } - } - - private static CoderResult malformed(final ByteBuffer src, final int sp, - final CharBuffer dst, final int dp, - final int nb) - { - src.position(sp - src.arrayOffset()); - final CoderResult cr = malformedN(src, nb); - updatePositions(src, sp, dst, dp); - return cr; - } - - - private static CoderResult malformed(final ByteBuffer src, - final int mark, final int nb) - { - src.position(mark); - final CoderResult cr = malformedN(src, nb); - src.position(mark); - return cr; - } - - private static CoderResult malformedForLength(final ByteBuffer src, - final int sp, - final CharBuffer dst, - final int dp, - final int malformedNB) - { - updatePositions(src, sp, dst, dp); - return CoderResult.malformedForLength(malformedNB); - } - - private static CoderResult malformedForLength(final ByteBuffer src, - final int mark, - final int malformedNB) - { - src.position(mark); - return CoderResult.malformedForLength(malformedNB); - } - - - private static CoderResult xflow(final Buffer src, final int sp, final int sl, - final Buffer dst, final int dp, final int nb) { - updatePositions(src, sp, dst, dp); - return (nb == 0 || sl - sp < nb) - ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; - } - - private static CoderResult xflow(final Buffer src, final int mark, final int nb) { - src.position(mark); - return (nb == 0 || src.remaining() < nb) - ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW; - } - - private CoderResult decodeArrayLoop(final ByteBuffer src, - final CharBuffer dst) - { - // This method is optimized for ASCII input. - final byte[] sa = src.array(); - int sp = src.arrayOffset() + src.position(); - final int sl = src.arrayOffset() + src.limit(); - - final char[] da = dst.array(); - int dp = dst.arrayOffset() + dst.position(); - final int dl = dst.arrayOffset() + dst.limit(); - final int dlASCII = dp + Math.min(sl - sp, dl - dp); - - // ASCII only loop - while (dp < dlASCII && sa[sp] >= 0) - da[dp++] = (char) sa[sp++]; - while (sp < sl) { - int b1 = sa[sp]; - if (b1 >= 0) { - // 1 byte, 7 bits: 0xxxxxxx - if (dp >= dl) - return xflow(src, sp, sl, dst, dp, 1); - da[dp++] = (char) b1; - sp++; - } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { - // 2 bytes, 11 bits: 110xxxxx 10xxxxxx - // [C2..DF] [80..BF] - if (sl - sp < 2 || dp >= dl) - return xflow(src, sp, sl, dst, dp, 2); - final int b2 = sa[sp + 1]; - // Now we check the first byte of 2-byte sequence as - // if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) - // no longer need to check b1 against c1 & c0 for - // malformed as we did in previous version - // (b1 & 0x1e) == 0x0 || (b2 & 0xc0) != 0x80; - // only need to check the second byte b2. - if (isNotContinuation(b2)) - return malformedForLength(src, sp, dst, dp, 1); - da[dp++] = (char) (((b1 << 6) ^ b2) - ^ - (((byte) 0xC0 << 6) ^ - ((byte) 0x80 << 0))); - sp += 2; - } else if ((b1 >> 4) == -2) { - // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx - final int srcRemaining = sl - sp; - if (srcRemaining < 3 || dp >= dl) { - if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1])) - return malformedForLength(src, sp, dst, dp, 1); - return xflow(src, sp, sl, dst, dp, 3); - } - final int b2 = sa[sp + 1]; - final int b3 = sa[sp + 2]; - if (isMalformed3(b1, b2, b3)) - return malformed(src, sp, dst, dp, 3); - final char c = (char) - ((b1 << 12) ^ - (b2 << 6) ^ - (b3 ^ - (((byte) 0xE0 << 12) ^ - ((byte) 0x80 << 6) ^ - ((byte) 0x80 << 0)))); - if (Character.isSurrogate(c)) - return malformedForLength(src, sp, dst, dp, 3); - da[dp++] = c; - sp += 3; - } else if ((b1 >> 3) == -2) { - // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - final int srcRemaining = sl - sp; - if (srcRemaining < 4 || dl - dp < 2) { - b1 &= 0xff; - if (b1 > 0xf4 || - srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1] & 0xff)) - return malformedForLength(src, sp, dst, dp, 1); - if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2])) - return malformedForLength(src, sp, dst, dp, 2); - return xflow(src, sp, sl, dst, dp, 4); - } - final int b2 = sa[sp + 1]; - final int b3 = sa[sp + 2]; - final int b4 = sa[sp + 3]; - final int uc = ((b1 << 18) ^ - (b2 << 12) ^ - (b3 << 6) ^ - (b4 ^ - (((byte) 0xF0 << 18) ^ - ((byte) 0x80 << 12) ^ - ((byte) 0x80 << 6) ^ - ((byte) 0x80 << 0)))); - if (isMalformed4(b2, b3, b4) || - // shortest form check - !Character.isSupplementaryCodePoint(uc)) { - return malformed(src, sp, dst, dp, 4); - } - da[dp++] = Character.highSurrogate(uc); - da[dp++] = Character.lowSurrogate(uc); - sp += 4; - } else - return malformed(src, sp, dst, dp, 1); - } - return xflow(src, sp, sl, dst, dp, 0); - } - - private CoderResult decodeBufferLoop(final ByteBuffer src, - final CharBuffer dst) - { - int mark = src.position(); - final int limit = src.limit(); - while (mark < limit) { - int b1 = src.get(); - if (b1 >= 0) { - // 1 byte, 7 bits: 0xxxxxxx - if (dst.remaining() < 1) - return xflow(src, mark, 1); // overflow - dst.put((char) b1); - mark++; - } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) { - // 2 bytes, 11 bits: 110xxxxx 10xxxxxx - if (limit - mark < 2|| dst.remaining() < 1) - return xflow(src, mark, 2); - final int b2 = src.get(); - if (isNotContinuation(b2)) - return malformedForLength(src, mark, 1); - dst.put((char) (((b1 << 6) ^ b2) - ^ - (((byte) 0xC0 << 6) ^ - ((byte) 0x80 << 0)))); - mark += 2; - } else if ((b1 >> 4) == -2) { - // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx - final int srcRemaining = limit - mark; - if (srcRemaining < 3 || dst.remaining() < 1) { - if (srcRemaining > 1 && isMalformed3_2(b1, src.get())) - return malformedForLength(src, mark, 1); - return xflow(src, mark, 3); - } - final int b2 = src.get(); - final int b3 = src.get(); - if (isMalformed3(b1, b2, b3)) - return malformed(src, mark, 3); - final char c = (char) - ((b1 << 12) ^ - (b2 << 6) ^ - (b3 ^ - (((byte) 0xE0 << 12) ^ - ((byte) 0x80 << 6) ^ - ((byte) 0x80 << 0)))); - if (Character.isSurrogate(c)) - return malformedForLength(src, mark, 3); - dst.put(c); - mark += 3; - } else if ((b1 >> 3) == -2) { - // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - final int srcRemaining = limit - mark; - if (srcRemaining < 4 || dst.remaining() < 2) { - b1 &= 0xff; - if (b1 > 0xf4 || - srcRemaining > 1 && isMalformed4_2(b1, src.get() & 0xff)) - return malformedForLength(src, mark, 1); - if (srcRemaining > 2 && isMalformed4_3(src.get())) - return malformedForLength(src, mark, 2); - return xflow(src, mark, 4); - } - final int b2 = src.get(); - final int b3 = src.get(); - final int b4 = src.get(); - final int uc = ((b1 << 18) ^ - (b2 << 12) ^ - (b3 << 6) ^ - (b4 ^ - (((byte) 0xF0 << 18) ^ - ((byte) 0x80 << 12) ^ - ((byte) 0x80 << 6) ^ - ((byte) 0x80 << 0)))); - if (isMalformed4(b2, b3, b4) || - // shortest form check - !Character.isSupplementaryCodePoint(uc)) { - return malformed(src, mark, 4); - } - dst.put(Character.highSurrogate(uc)); - dst.put(Character.lowSurrogate(uc)); - mark += 4; - } else { - return malformed(src, mark, 1); - } - } - return xflow(src, mark, 0); - } - - protected CoderResult decodeLoop(final ByteBuffer src, - final CharBuffer dst) - { - if (src.hasArray() && dst.hasArray()) - return decodeArrayLoop(src, dst); - else - return decodeBufferLoop(src, dst); - } - - private static ByteBuffer getByteBuffer(ByteBuffer bb, final byte[] ba, final int sp) - { - if (bb == null) - bb = ByteBuffer.wrap(ba); - bb.position(sp); - return bb; - } - } - - private static final class Encoder extends CharsetEncoder { - - private Encoder(final Charset cs) { - super(cs, 1.1f, 3.0f); - } - - public boolean canEncode(final char c) { - return !Character.isSurrogate(c); - } - - public boolean isLegalReplacement(final byte[] repl) { - return ((repl.length == 1 && repl[0] >= 0) || - super.isLegalReplacement(repl)); - } - - private static CoderResult overflow(final CharBuffer src, final int sp, - final ByteBuffer dst, final int dp) { - updatePositions(src, sp, dst, dp); - return CoderResult.OVERFLOW; - } - - private static CoderResult overflow(final CharBuffer src, final int mark) { - src.position(mark); - return CoderResult.OVERFLOW; - } - - private Surrogate.Parser sgp; - private CoderResult encodeArrayLoopUTF8(final CharBuffer src, - final ByteBuffer dst) - { - final char[] sa = src.array(); - int sp = src.arrayOffset() + src.position(); - final int sl = src.arrayOffset() + src.limit(); - - final byte[] da = dst.array(); - int dp = dst.arrayOffset() + dst.position(); - final int dl = dst.arrayOffset() + dst.limit(); - final int dlASCII = dp + Math.min(sl - sp, dl - dp); - - // ASCII only loop - while (dp < dlASCII && sa[sp] < '\u0080') - da[dp++] = (byte) sa[sp++]; - while (sp < sl) { - final char c = sa[sp]; - if (c < 0x80) { - // Have at most seven bits - if (dp >= dl) - return overflow(src, sp, dst, dp); - da[dp++] = (byte)c; - } else if (c < 0x800) { - // 2 bytes, 11 bits - if (dl - dp < 2) - return overflow(src, sp, dst, dp); - da[dp++] = (byte)(0xc0 | (c >> 6)); - da[dp++] = (byte)(0x80 | (c & 0x3f)); - } else if (Character.isSurrogate(c)) { - // Have a surrogate pair - if (sgp == null) - sgp = new Surrogate.Parser(); - final int uc = sgp.parse(c, sa, sp, sl); - if (uc < 0) { - updatePositions(src, sp, dst, dp); - return sgp.error(); - } - if (dl - dp < 4) - return overflow(src, sp, dst, dp); - da[dp++] = (byte)(0xf0 | ((uc >> 18))); - da[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f)); - da[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f)); - da[dp++] = (byte)(0x80 | (uc & 0x3f)); - sp++; // 2 chars - } else { - // 3 bytes, 16 bits - if (dl - dp < 3) - return overflow(src, sp, dst, dp); - da[dp++] = (byte)(0xe0 | ((c >> 12))); - da[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f)); - da[dp++] = (byte)(0x80 | (c & 0x3f)); - } - sp++; - } - updatePositions(src, sp, dst, dp); - return CoderResult.UNDERFLOW; - } - - private CoderResult encodeBufferLoop(final CharBuffer src, - final ByteBuffer dst) - { - int mark = src.position(); - while (src.hasRemaining()) { - final char c = src.get(); - if (c < 0x80) { - // Have at most seven bits - if (!dst.hasRemaining()) - return overflow(src, mark); - dst.put((byte)c); - } else if (c < 0x800) { - // 2 bytes, 11 bits - if (dst.remaining() < 2) - return overflow(src, mark); - dst.put((byte)(0xc0 | (c >> 6))); - dst.put((byte)(0x80 | (c & 0x3f))); - } else if (Character.isSurrogate(c)) { - // Have a surrogate pair - if (sgp == null) - sgp = new Surrogate.Parser(); - final int uc = sgp.parse(c, src); - if (uc < 0) { - src.position(mark); - return sgp.error(); - } - if (dst.remaining() < 4) - return overflow(src, mark); - dst.put((byte)(0xf0 | ((uc >> 18)))); - dst.put((byte)(0x80 | ((uc >> 12) & 0x3f))); - dst.put((byte)(0x80 | ((uc >> 6) & 0x3f))); - dst.put((byte)(0x80 | (uc & 0x3f))); - mark++; // 2 chars - } else { - // 3 bytes, 16 bits - if (dst.remaining() < 3) - return overflow(src, mark); - dst.put((byte)(0xe0 | ((c >> 12)))); - dst.put((byte)(0x80 | ((c >> 6) & 0x3f))); - dst.put((byte)(0x80 | (c & 0x3f))); - } - mark++; - } - src.position(mark); - return CoderResult.UNDERFLOW; - } - - protected final CoderResult encodeLoop(final CharBuffer src, - final ByteBuffer dst) - { - if (src.hasArray() && dst.hasArray()) - return encodeArrayLoopUTF8(src, dst); - else - return encodeBufferLoop(src, dst); - } - } -} diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/Unicode.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/Unicode.java deleted file mode 100644 index 8572ff150c..0000000000 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/Unicode.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2017 Mirko Sertic - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package de.mirkosertic.bytecoder.classlib.java.nio.charset; - -import java.nio.charset.Charset; - -abstract class Unicode extends Charset -{ - public Unicode(final String name, final String[] aliases) { - super(name, aliases); - } - - public boolean contains(final Charset cs) { - return cs instanceof UTF_8; - } -} diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UnicodeDecoder.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UnicodeDecoder.java deleted file mode 100644 index e14aa2997a..0000000000 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UnicodeDecoder.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2000, 2006, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package de.mirkosertic.bytecoder.classlib.java.nio.charset; - -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CoderResult; - - -abstract class UnicodeDecoder extends CharsetDecoder { - - protected static final char BYTE_ORDER_MARK = (char) 0xfeff; - protected static final char REVERSED_MARK = (char) 0xfffe; - - protected static final int NONE = 0; - protected static final int BIG = 1; - protected static final int LITTLE = 2; - - private final int expectedByteOrder; - private int currentByteOrder; - private int defaultByteOrder = BIG; - - public UnicodeDecoder(final Charset cs, final int bo) { - super(cs, 0.5f, 1.0f); - expectedByteOrder = currentByteOrder = bo; - } - - public UnicodeDecoder(final Charset cs, final int bo, final int defaultBO) { - this(cs, bo); - defaultByteOrder = defaultBO; - } - - private char decode(final int b1, final int b2) { - if (currentByteOrder == BIG) - return (char)((b1 << 8) | b2); - else - return (char)((b2 << 8) | b1); - } - - protected CoderResult decodeLoop(final ByteBuffer src, final CharBuffer dst) { - int mark = src.position(); - - try { - while (src.remaining() > 1) { - final int b1 = src.get() & 0xff; - final int b2 = src.get() & 0xff; - - // Byte Order Mark interpretation - if (currentByteOrder == NONE) { - final char c = (char)((b1 << 8) | b2); - if (c == BYTE_ORDER_MARK) { - currentByteOrder = BIG; - mark += 2; - continue; - } else if (c == REVERSED_MARK) { - currentByteOrder = LITTLE; - mark += 2; - continue; - } else { - currentByteOrder = defaultByteOrder; - // FALL THROUGH to process b1, b2 normally - } - } - - final char c = decode(b1, b2); - - if (c == REVERSED_MARK) { - // A reversed BOM cannot occur within middle of stream - return CoderResult.malformedForLength(2); - } - - // Surrogates - if (Character.isSurrogate(c)) { - if (Character.isHighSurrogate(c)) { - if (src.remaining() < 2) - return CoderResult.UNDERFLOW; - final char c2 = decode(src.get() & 0xff, src.get() & 0xff); - if (!Character.isLowSurrogate(c2)) - return CoderResult.malformedForLength(4); - if (dst.remaining() < 2) - return CoderResult.OVERFLOW; - mark += 4; - dst.put(c); - dst.put(c2); - continue; - } - // Unpaired low surrogate - return CoderResult.malformedForLength(2); - } - - if (!dst.hasRemaining()) - return CoderResult.OVERFLOW; - mark += 2; - dst.put(c); - - } - return CoderResult.UNDERFLOW; - - } finally { - src.position(mark); - } - } - - protected void implReset() { - currentByteOrder = expectedByteOrder; - } - -} diff --git a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UnicodeEncoder.java b/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UnicodeEncoder.java deleted file mode 100644 index c7ee924f45..0000000000 --- a/classlib/java.base/src/main/java/de/mirkosertic/bytecoder/classlib/java/nio/charset/UnicodeEncoder.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package de.mirkosertic.bytecoder.classlib.java.nio.charset; - -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CoderResult; - -/** - * Base class for different flavors of UTF-16 encoders - */ -public abstract class UnicodeEncoder extends CharsetEncoder { - - protected static final char BYTE_ORDER_MARK = '\uFEFF'; - protected static final char REVERSED_MARK = '\uFFFE'; - - protected static final int BIG = 0; - protected static final int LITTLE = 1; - - private final int byteOrder; /* Byte order in use */ - private final boolean usesMark; /* Write an initial BOM */ - private boolean needsMark; - - protected UnicodeEncoder(final Charset cs, final int bo, final boolean m) { - super(cs, 2.0f, - // Four bytes max if you need a BOM - m ? 4.0f : 2.0f, - // Replacement depends upon byte order - ((bo == BIG) - ? new byte[] { (byte)0xff, (byte)0xfd } - : new byte[] { (byte)0xfd, (byte)0xff })); - usesMark = needsMark = m; - byteOrder = bo; - } - - private void put(final char c, final ByteBuffer dst) { - if (byteOrder == BIG) { - dst.put((byte)(c >> 8)); - dst.put((byte)(c & 0xff)); - } else { - dst.put((byte)(c & 0xff)); - dst.put((byte)(c >> 8)); - } - } - - private final Surrogate.Parser sgp = new Surrogate.Parser(); - - protected CoderResult encodeLoop(final CharBuffer src, final ByteBuffer dst) { - int mark = src.position(); - - if (needsMark && src.hasRemaining()) { - if (dst.remaining() < 2) - return CoderResult.OVERFLOW; - put(BYTE_ORDER_MARK, dst); - needsMark = false; - } - try { - while (src.hasRemaining()) { - final char c = src.get(); - if (!Character.isSurrogate(c)) { - if (dst.remaining() < 2) - return CoderResult.OVERFLOW; - mark++; - put(c, dst); - continue; - } - final int d = sgp.parse(c, src); - if (d < 0) - return sgp.error(); - if (dst.remaining() < 4) - return CoderResult.OVERFLOW; - mark += 2; - put(Character.highSurrogate(d), dst); - put(Character.lowSurrogate(d), dst); - } - return CoderResult.UNDERFLOW; - } finally { - src.position(mark); - } - } - - protected void implReset() { - needsMark = usesMark; - } - - public boolean canEncode(final char c) { - return ! Character.isSurrogate(c); - } -} diff --git a/cli/src/main/java/de/mirkosertic/bytecoder/cli/BytecoderCLI.java b/cli/src/main/java/de/mirkosertic/bytecoder/cli/BytecoderCLI.java index cb6bf59364..b2a04f16f5 100644 --- a/cli/src/main/java/de/mirkosertic/bytecoder/cli/BytecoderCLI.java +++ b/cli/src/main/java/de/mirkosertic/bytecoder/cli/BytecoderCLI.java @@ -84,6 +84,8 @@ public static class CLIOptions { @Option(names = "-registerallocator", required = false, description = "Which register allocator should be used? Can be linear or passthru. Defaults to 'linear'.") protected String registerAllocator = "linear"; + @Option(names = "-additionalClassesToLink", required = false, description = " List of full qualified class names to be linked beside the statically referenced ones.") + protected String additionalClassesToLink[] = new String[0]; } public static void main(final String[] args) throws IOException, ClassNotFoundException { @@ -115,7 +117,7 @@ public static void main(final String[] args) throws IOException, ClassNotFoundEx final BytecodeMethodSignature theSignature = new BytecodeMethodSignature(BytecodePrimitiveTypeRef.VOID, new BytecodeTypeRef[] { new BytecodeArrayTypeRef(BytecodeObjectTypeRef.fromRuntimeClass(String.class), 1) }); - final CompileOptions theOptions = new CompileOptions(new Slf4JLogger(), theCLIOptions.debugOutput, KnownOptimizer.valueOf(theCLIOptions.optimizationLevel), theCLIOptions.enableExceptionHandling, theCLIOptions.filenamePrefix, theCLIOptions.wasmInitialPages, theCLIOptions.wasmMaximumPages, theCLIOptions.minifyCompileResult, theCLIOptions.preferStackifier, Allocator.valueOf(theCLIOptions.registerAllocator), new String[0]); + final CompileOptions theOptions = new CompileOptions(new Slf4JLogger(), theCLIOptions.debugOutput, KnownOptimizer.valueOf(theCLIOptions.optimizationLevel), theCLIOptions.enableExceptionHandling, theCLIOptions.filenamePrefix, theCLIOptions.wasmInitialPages, theCLIOptions.wasmMaximumPages, theCLIOptions.minifyCompileResult, theCLIOptions.preferStackifier, Allocator.valueOf(theCLIOptions.registerAllocator), theCLIOptions.additionalClassesToLink); final CompileResult theCode = theCompileTarget.compile(theOptions, theTargetClass, "main", theSignature); for (final CompileResult.Content content : theCode.getContent()) { final File theBytecoderFileName = new File(theBytecoderDirectory, content.getFileName()); diff --git a/core/src/main/java/de/mirkosertic/bytecoder/backend/CompileTarget.java b/core/src/main/java/de/mirkosertic/bytecoder/backend/CompileTarget.java index 5ffaf46430..4b5800723c 100644 --- a/core/src/main/java/de/mirkosertic/bytecoder/backend/CompileTarget.java +++ b/core/src/main/java/de/mirkosertic/bytecoder/backend/CompileTarget.java @@ -35,6 +35,8 @@ import de.mirkosertic.bytecoder.core.BytecodeObjectTypeRef; import de.mirkosertic.bytecoder.core.BytecodePrimitiveTypeRef; import de.mirkosertic.bytecoder.core.BytecodeTypeRef; +import de.mirkosertic.bytecoder.core.BytecodeUtf8Constant; +import de.mirkosertic.bytecoder.graph.Edge; import de.mirkosertic.bytecoder.ssa.NaiveProgramGenerator; public class CompileTarget { @@ -77,6 +79,16 @@ public CompileResult compile( theCallsite.resolveVirtualMethod("invokeExact", new BytecodeMethodSignature(BytecodeObjectTypeRef.fromRuntimeClass(Object.class), new BytecodeTypeRef[] {new BytecodeArrayTypeRef(BytecodeObjectTypeRef.fromRuntimeClass(Object.class), 1)})); + // We have to link character set implementations + // to make them available via reflection API + theLinkerContext.resolveClass(BytecodeObjectTypeRef.fromUtf8Constant(new BytecodeUtf8Constant("sun/nio/cs/UTF_8"))) + .resolveConstructorInvocation(new BytecodeMethodSignature(BytecodePrimitiveTypeRef.VOID, new BytecodeTypeRef[0])); + theLinkerContext.resolveClass(BytecodeObjectTypeRef.fromUtf8Constant(new BytecodeUtf8Constant("sun/nio/cs/UTF_16"))).resolveConstructorInvocation(new BytecodeMethodSignature(BytecodePrimitiveTypeRef.VOID, new BytecodeTypeRef[0])); + theLinkerContext.resolveClass(BytecodeObjectTypeRef.fromUtf8Constant(new BytecodeUtf8Constant("sun/nio/cs/ISO_8859_1"))).resolveConstructorInvocation(new BytecodeMethodSignature(BytecodePrimitiveTypeRef.VOID, new BytecodeTypeRef[0])); + theLinkerContext.resolveClass(BytecodeObjectTypeRef.fromUtf8Constant(new BytecodeUtf8Constant("sun/nio/cs/US_ASCII"))).resolveConstructorInvocation(new BytecodeMethodSignature(BytecodePrimitiveTypeRef.VOID, new BytecodeTypeRef[0])); + + theLinkerContext.resolveClass(BytecodeObjectTypeRef.fromUtf8Constant(new BytecodeUtf8Constant("java/lang/CharacterDataLatin1"))).resolveConstructorInvocation(new BytecodeMethodSignature(BytecodePrimitiveTypeRef.VOID, new BytecodeTypeRef[0])); + // Additional classes if (aOptions.getAdditionalClassesToLink() != null) { for (final String theClassname : aOptions.getAdditionalClassesToLink()) { @@ -111,7 +123,7 @@ public CompileResult compile( // We have to link all callback implementations. They are not part of the dependency yet as // they are not invoked by the bytecode, but from the outside world. By adding them to the // dependency tree, we make sure they are available for invocation. - final List theLinkedClasses = theLinkerContext.linkedClasses().map(t -> t.targetNode()) + final List theLinkedClasses = theLinkerContext.linkedClasses().map(Edge::targetNode) .collect(Collectors.toList()); for (final BytecodeLinkedClass theLinkedClass : theLinkedClasses) { if (theLinkedClass.isCallback()) { diff --git a/core/src/main/java/de/mirkosertic/bytecoder/intrinsics/Intrinsics.java b/core/src/main/java/de/mirkosertic/bytecoder/intrinsics/Intrinsics.java index 67a648a320..f52b29a98b 100644 --- a/core/src/main/java/de/mirkosertic/bytecoder/intrinsics/Intrinsics.java +++ b/core/src/main/java/de/mirkosertic/bytecoder/intrinsics/Intrinsics.java @@ -32,7 +32,7 @@ public Intrinsics() { intrinsics.add(new VMIntrinsic()); intrinsics.add(new JavaLangStrictMathIntrinsic()); intrinsics.add(new JavaLangMathIntrinsic()); - intrinsics.add(new RuntimeClassIntrinsic()); + intrinsics.add(new JavaLangClassIntrinsic()); intrinsics.add(new ObjectConstructorCallIntrinsic()); intrinsics.add(new JavaLangEnumIntrinsic()); intrinsics.add(new JavaLangFloatIntrinsic()); diff --git a/core/src/main/java/de/mirkosertic/bytecoder/intrinsics/RuntimeClassIntrinsic.java b/core/src/main/java/de/mirkosertic/bytecoder/intrinsics/JavaLangClassIntrinsic.java similarity index 67% rename from core/src/main/java/de/mirkosertic/bytecoder/intrinsics/RuntimeClassIntrinsic.java rename to core/src/main/java/de/mirkosertic/bytecoder/intrinsics/JavaLangClassIntrinsic.java index 3e405d492b..7e5b210451 100644 --- a/core/src/main/java/de/mirkosertic/bytecoder/intrinsics/RuntimeClassIntrinsic.java +++ b/core/src/main/java/de/mirkosertic/bytecoder/intrinsics/JavaLangClassIntrinsic.java @@ -20,12 +20,14 @@ import java.util.List; -public class RuntimeClassIntrinsic extends Intrinsic { +public class JavaLangClassIntrinsic extends Intrinsic { @Override public boolean intrinsify(final Program aProgram, final BytecodeInstructionINVOKESPECIAL aInstruction, final String aMethodName, final BytecodeObjectTypeRef aType, final List aArguments, final Variable aTarget, final RegionNode aTargetBlock, final ParsingHelper aHelper) { final BytecodeMethodSignature theSignature = aInstruction.getMethodReference().getNameAndTypeIndex().getNameAndType().getDescriptorIndex().methodSignature(); + final BytecodeObjectTypeRef theCalledClass = BytecodeObjectTypeRef.fromUtf8Constant(aInstruction.getMethodReference().getClassIndex().getClassConstant().getConstant()); + if ("getClass".equals(aMethodName) && BytecodeLinkedClass.GET_CLASS_SIGNATURE .matchesExactlyTo(theSignature)) { final Variable theNewVariable = aTargetBlock @@ -34,22 +36,44 @@ public boolean intrinsify(final Program aProgram, final BytecodeInstructionINVOK return true; } + + if (theCalledClass.name().equals(Class.class.getName())) { + if ("newInstance".equals(aMethodName)) { + aHelper.push(aInstruction.getOpcodeAddress(), new NewInstanceFromDefaultConstructorExpression(aProgram, aInstruction.getOpcodeAddress(), aTarget)); + return true; + } + if ("desiredAssertionStatus".equals(aMethodName) && theSignature.matchesExactlyTo(BytecodeLinkedClass.DESIRED_ASSERTION_STATUS_SIGNATURE)) { + // Status is always false + aHelper.push(aInstruction.getOpcodeAddress(), new IntegerValue(0)); + return true; + } + } + return false; } @Override public boolean intrinsify(final Program aProgram, final BytecodeInstructionINVOKEVIRTUAL aInstruction, final String aMethodName, final List aArguments, final Value aTarget, final RegionNode aTargetBlock, final ParsingHelper aHelper) { final BytecodeMethodSignature theSignature = aInstruction.getMethodReference().getNameAndTypeIndex().getNameAndType().getDescriptorIndex().methodSignature(); + final BytecodeObjectTypeRef theCalledClass = BytecodeObjectTypeRef.fromUtf8Constant(aInstruction.getMethodReference().getClassIndex().getClassConstant().getConstant()); + if ("getClass".equals(aMethodName) && theSignature.matchesExactlyTo(BytecodeLinkedClass.GET_CLASS_SIGNATURE)) { final Value theValue = new TypeOfExpression(aProgram, aInstruction.getOpcodeAddress(), aTarget); final Variable theNewVariable = aTargetBlock.newVariable(aInstruction.getOpcodeAddress(), TypeRef.toType(theSignature.getReturnType()), theValue); aHelper.push(aInstruction.getOpcodeAddress(), theNewVariable); return true; } - if ("desiredAssertionStatus".equals(aMethodName) && theSignature.matchesExactlyTo(BytecodeLinkedClass.DESIRED_ASSERTION_STATUS_SIGNATURE)) { - // Status is always false - aHelper.push(aInstruction.getOpcodeAddress(), new IntegerValue(0)); - return true; + + if (theCalledClass.name().equals(Class.class.getName())) { + if ("newInstance".equals(aMethodName)) { + aHelper.push(aInstruction.getOpcodeAddress(), new NewInstanceFromDefaultConstructorExpression(aProgram, aInstruction.getOpcodeAddress(), aTarget)); + return true; + } + if ("desiredAssertionStatus".equals(aMethodName) && theSignature.matchesExactlyTo(BytecodeLinkedClass.DESIRED_ASSERTION_STATUS_SIGNATURE)) { + // Status is always false + aHelper.push(aInstruction.getOpcodeAddress(), new IntegerValue(0)); + return true; + } } return false; } diff --git a/core/src/test/java/de/mirkosertic/bytecoder/allocator/PassThruRegisterAllocatorTest.java b/core/src/test/java/de/mirkosertic/bytecoder/allocator/PassThruRegisterAllocatorTest.java index 26cacb05a7..9c21391c1a 100644 --- a/core/src/test/java/de/mirkosertic/bytecoder/allocator/PassThruRegisterAllocatorTest.java +++ b/core/src/test/java/de/mirkosertic/bytecoder/allocator/PassThruRegisterAllocatorTest.java @@ -263,11 +263,11 @@ public void testStandardCharsetsForNameRegisterAllocation() throws HeadToHeadCon System.out.println(String.format("%s Def at %d, LastUsedAt %d", v.getName(), v.liveRange().getDefinedAt(), v.liveRange().getLastUsedAt())); } - assertEquals(66, vars.size()); + assertEquals(75, vars.size()); final AbstractAllocator theAllocator = Allocator.passthru.allocate(p, Variable::resolveType, theLinkerContext); - assertEquals(63, theAllocator.assignedRegister().size()); + assertEquals(72, theAllocator.assignedRegister().size()); final CompileOptions theOptions = new CompileOptions(new Slf4JLogger(), true, KnownOptimizer.NONE, false, "ks", 100, 100, false, true, Allocator.passthru, new String[0]); final JSMinifier theMinifier = new JSMinifier(theOptions); diff --git a/core/src/test/java/de/mirkosertic/bytecoder/core/CharsetTest.java b/core/src/test/java/de/mirkosertic/bytecoder/core/CharsetTest.java index d5cb6e3483..3ec233fbb9 100644 --- a/core/src/test/java/de/mirkosertic/bytecoder/core/CharsetTest.java +++ b/core/src/test/java/de/mirkosertic/bytecoder/core/CharsetTest.java @@ -16,7 +16,6 @@ package de.mirkosertic.bytecoder.core; import de.mirkosertic.bytecoder.backend.CompileTarget; -import de.mirkosertic.bytecoder.classlib.java.nio.charset.UTF_8; import de.mirkosertic.bytecoder.unittest.BytecoderTestOption; import de.mirkosertic.bytecoder.unittest.BytecoderTestOptions; import de.mirkosertic.bytecoder.unittest.BytecoderUnitTestRunner; @@ -30,6 +29,7 @@ import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.util.Arrays; @RunWith(BytecoderUnitTestRunner.class) @@ -49,7 +49,7 @@ public void testPrint() { @Test @Ignore public void testUTF8() { - final Charset cs = Charset.forName("UTF-8"); + final Charset cs = StandardCharsets.UTF_8; final ByteBuffer bf = cs.encode("Münster"); final byte[] result = Arrays.copyOf(bf.array(), bf.limit()); @@ -93,7 +93,7 @@ public void testByteArray() { @Test public void testUTF8Charset() throws CharacterCodingException { final String m = "Mün"; - final UTF_8 cs = UTF_8.INSTANCE; + final Charset cs = StandardCharsets.UTF_8; final CharsetEncoder encoder = cs.newEncoder(); final ByteBuffer bb = encoder.encode(CharBuffer.wrap(new char[] {'M','ü','n'})); final byte[] arr = bb.array();