From f5e3a45699b4fd084896fc8364f6c11341f679dc Mon Sep 17 00:00:00 2001 From: Nikita Prokopov Date: Thu, 18 Mar 2021 00:47:37 +0100 Subject: [PATCH] BreakIterator, U16String --- examples/scenes/src/BreakIteratorScene.java | 112 ++++ examples/scenes/src/Scenes.java | 3 +- native/src/BreakIterator.cc | 119 ++++ native/src/U16String.cc | 27 + native/src/interop.cc | 15 + native/src/interop.hh | 6 + .../org/jetbrains/skija/BreakIterator.java | 550 ++++++++++++++++++ .../java/org/jetbrains/skija/U16String.java | 42 ++ 8 files changed, 873 insertions(+), 1 deletion(-) create mode 100644 examples/scenes/src/BreakIteratorScene.java create mode 100644 native/src/BreakIterator.cc create mode 100644 native/src/U16String.cc create mode 100644 shared/src/main/java/org/jetbrains/skija/BreakIterator.java create mode 100644 shared/src/main/java/org/jetbrains/skija/U16String.java diff --git a/examples/scenes/src/BreakIteratorScene.java b/examples/scenes/src/BreakIteratorScene.java new file mode 100644 index 00000000..ece4565e --- /dev/null +++ b/examples/scenes/src/BreakIteratorScene.java @@ -0,0 +1,112 @@ +package org.jetbrains.skija.examples.scenes; + +import java.util.stream.*; +import org.jetbrains.skija.*; + +public class BreakIteratorScene extends Scene { + Font mono11 = new Font(jbMono, 11); + int x, y; + + public BreakIteratorScene() { + _variants = new String[] { "ICU", "java.text" }; + } + + @Override + public void draw(Canvas canvas, int width, int height, float dpi, int xpos, int ypos) { + x = 20; + y = 40; + + var text = "🐉☺️❤️👮🏿👨‍👩‍👧‍👦🚵🏼‍♀️🇷🇺🏴󠁧󠁢󠁥󠁮󠁧󠁿*️⃣ǍǍZ̵̡̢͇͓͎͖͎̪͑͜ͅͅबिक्"; + if ("ICU".equals(_variants[_variantIdx])) + drawCharacterICU(canvas, height, text); + else + drawCharacterJavaText(canvas, height, text); + x = width / 2 + 10; + y = 40; + + text = "One, (two; three). FoUr,five!"; + if ("ICU".equals(_variants[_variantIdx])) + drawWordICU(canvas, height, text); + else + drawWordJavaText(canvas, height, text); + } + + public void drawUnicode(Canvas canvas, String str) { + var decoded = str.codePoints() + .mapToObj(c -> String.format("U+%4s", Long.toString(c, 16).toUpperCase()).replaceAll(" ", "0")) + .collect(Collectors.joining(" ")); + + try (var line = TextLine.make(decoded, mono11); ) { + canvas.drawTextLine(line, x + 50, y, blackFill); + } + } + + public void drawSubstring(Canvas canvas, String str, int height) { + try (var line = TextLine.make(str, inter13); ) { + canvas.drawTextLine(line, x, y, blackFill); + } + + y += 20; + if (y + 20 > height - 20) { + x += 100; + y = 20; + } + } + + public void drawCharacterICU(Canvas canvas, int height, String text) { + try (var iter = BreakIterator.makeCharacterInstance();) { + iter.setText(text); + int start = iter.first(); + while (true) { + int end = iter.next(); + if (end == BreakIterator.DONE) + break; + drawUnicode(canvas, text.substring(start, end)); + drawSubstring(canvas, text.substring(start, end), height); + start = end; + } + } + } + + public void drawCharacterJavaText(Canvas canvas, int height, String text) { + var iter = java.text.BreakIterator.getCharacterInstance(); + iter.setText(text); + int start = iter.first(); + while (true) { + int end = iter.next(); + if (end == java.text.BreakIterator.DONE) + break; + drawUnicode(canvas, text.substring(start, end)); + drawSubstring(canvas, text.substring(start, end), height); + start = end; + } + } + + public void drawWordICU(Canvas canvas, int height, String text) { + try (var iter = BreakIterator.makeWordInstance();) { + iter.setText(text); + int start = iter.first(); + while (true) { + int end = iter.next(); + if (end == BreakIterator.DONE) + break; + if (iter.getRuleStatus() != BreakIterator.WORD_NONE) + drawSubstring(canvas, text.substring(start, end), height); + start = end; + } + } + } + + public void drawWordJavaText(Canvas canvas, int height, String text) { + var iter = java.text.BreakIterator.getWordInstance(); + iter.setText(text); + int start = iter.first(); + while (true) { + int end = iter.next(); + if (end == java.text.BreakIterator.DONE) + break; + drawSubstring(canvas, text.substring(start, end), height); + start = end; + } + } +} \ No newline at end of file diff --git a/examples/scenes/src/Scenes.java b/examples/scenes/src/Scenes.java index f776be82..d65941a6 100644 --- a/examples/scenes/src/Scenes.java +++ b/examples/scenes/src/Scenes.java @@ -6,7 +6,7 @@ public class Scenes { public static TreeMap scenes; - public static String currentScene = "Codec"; + public static String currentScene = "Break Iterator"; public static HUD hud = new HUD(); public static boolean vsync = true; public static boolean stats = true; @@ -16,6 +16,7 @@ public class Scenes { scenes.put("Bitmap", null); scenes.put("Bitmap Image", null); scenes.put("Blends", null); + scenes.put("Break Iterator", null); scenes.put("Codec", null); scenes.put("Color Filters", null); scenes.put("Decorations Bench", null); diff --git a/native/src/BreakIterator.cc b/native/src/BreakIterator.cc new file mode 100644 index 00000000..7a284bac --- /dev/null +++ b/native/src/BreakIterator.cc @@ -0,0 +1,119 @@ +#include +#include "interop.hh" +#include "unicode/ubrk.h" + +static void deleteBreakIterator(UBreakIterator* instance) { + ubrk_close(instance); +} + +extern "C" JNIEXPORT jlong JNICALL Java_org_jetbrains_skija_BreakIterator__1nGetFinalizer(JNIEnv* env, jclass jclass) { + return static_cast(reinterpret_cast(&deleteBreakIterator)); +} + +extern "C" JNIEXPORT jlong JNICALL Java_org_jetbrains_skija_BreakIterator__1nMake + (JNIEnv* env, jclass jclass, jint type, jstring localeStr) { + UErrorCode status = U_ZERO_ERROR; + UBreakIterator* instance; + if (localeStr == nullptr) + instance = ubrk_open(static_cast(type), uloc_getDefault(), nullptr, 0, &status); + else { + SkString locale = skString(env, localeStr); + instance = ubrk_open(static_cast(type), locale.c_str(), nullptr, 0, &status); + } + + if (U_FAILURE(status)) { + env->ThrowNew(java::lang::RuntimeException::cls, u_errorName(status)); + return 0; + } else + return reinterpret_cast(instance); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nClone + (JNIEnv* env, jclass jclass, jlong ptr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + UErrorCode status = U_ZERO_ERROR; + UBreakIterator* clone = ubrk_safeClone(instance, nullptr, 0, &status); + if (U_FAILURE(status)) { + env->ThrowNew(java::lang::RuntimeException::cls, u_errorName(status)); + return 0; + } else + return reinterpret_cast(clone); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nCurrent + (JNIEnv* env, jclass jclass, jlong ptr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_current(instance); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nNext + (JNIEnv* env, jclass jclass, jlong ptr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_next(instance); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nPrevious + (JNIEnv* env, jclass jclass, jlong ptr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_previous(instance); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nFirst + (JNIEnv* env, jclass jclass, jlong ptr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_first(instance); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nLast + (JNIEnv* env, jclass jclass, jlong ptr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_last(instance); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nPreceding + (JNIEnv* env, jclass jclass, jlong ptr, jint offset) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_preceding(instance, offset); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nFollowing + (JNIEnv* env, jclass jclass, jlong ptr, jint offset) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_following(instance, offset); +} + +extern "C" JNIEXPORT jboolean JNICALL Java_org_jetbrains_skija_BreakIterator__1nIsBoundary + (JNIEnv* env, jclass jclass, jlong ptr, jint offset) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_isBoundary(instance, offset); +} + +extern "C" JNIEXPORT jint JNICALL Java_org_jetbrains_skija_BreakIterator__1nGetRuleStatus + (JNIEnv* env, jclass jclass, jlong ptr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + return ubrk_getRuleStatus(instance); +} + +extern "C" JNIEXPORT jintArray JNICALL Java_org_jetbrains_skija_BreakIterator__1nGetRuleStatuses + (JNIEnv* env, jclass jclass, jlong ptr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + UErrorCode status = U_ZERO_ERROR; + int32_t len = ubrk_getRuleStatusVec(instance, nullptr, 0, &status); + if (U_FAILURE(status)) + env->ThrowNew(java::lang::RuntimeException::cls, u_errorName(status)); + std::vector vec(len); + ubrk_getRuleStatusVec(instance, reinterpret_cast(vec.data()), len, &status); + if (U_FAILURE(status)) + env->ThrowNew(java::lang::RuntimeException::cls, u_errorName(status)); + return javaIntArray(env, vec); +} + +extern "C" JNIEXPORT void JNICALL Java_org_jetbrains_skija_BreakIterator__1nSetText + (JNIEnv* env, jclass jclass, jlong ptr, jlong textPtr) { + UBreakIterator* instance = reinterpret_cast(static_cast(ptr)); + std::vector* text = reinterpret_cast*>(static_cast(textPtr)); + UErrorCode status = U_ZERO_ERROR; + ubrk_setText(instance, reinterpret_cast(text->data()), text->size(), &status); + if (U_FAILURE(status)) + env->ThrowNew(java::lang::RuntimeException::cls, u_errorName(status)); +} \ No newline at end of file diff --git a/native/src/U16String.cc b/native/src/U16String.cc new file mode 100644 index 00000000..e95a5ceb --- /dev/null +++ b/native/src/U16String.cc @@ -0,0 +1,27 @@ +#include +#include +#include "interop.hh" +#include "SkString.h" + +static void deleteU16String(std::vector* instance) { + delete instance; +} + +extern "C" JNIEXPORT jlong JNICALL Java_org_jetbrains_skija_U16String__1nGetFinalizer + (JNIEnv* env, jclass jclass) { + return static_cast(reinterpret_cast(&deleteU16String)); +} + +extern "C" JNIEXPORT jlong JNICALL Java_org_jetbrains_skija_U16String__1nMake + (JNIEnv* env, jclass jclass, jstring str) { + jsize len = env->GetStringLength(str); + std::vector* instance = new std::vector(len); + env->GetStringRegion(str, 0, len, instance->data()); + return reinterpret_cast(instance); +} + +extern "C" JNIEXPORT jobject JNICALL Java_org_jetbrains_skija_U16String__1nToString + (JNIEnv* env, jclass jclass, jlong ptr) { + std::vector* instance = reinterpret_cast*>(static_cast(ptr)); + return env->NewString(instance->data(), instance->size()); +} diff --git a/native/src/interop.cc b/native/src/interop.cc index 3fffdc08..226691ff 100644 --- a/native/src/interop.cc +++ b/native/src/interop.cc @@ -39,6 +39,19 @@ namespace java { } } + namespace RuntimeException { + jclass cls; + + void onLoad(JNIEnv* env) { + jclass local = env->FindClass("java/lang/RuntimeException"); + cls = static_cast(env->NewGlobalRef(local)); + } + + void onUnload(JNIEnv* env) { + env->DeleteGlobalRef(cls); + } + } + namespace String { jclass cls; @@ -111,6 +124,7 @@ namespace java { void onLoad(JNIEnv* env) { io::OutputStream::onLoad(env); lang::Float::onLoad(env); + lang::RuntimeException::onLoad(env); lang::String::onLoad(env); lang::Throwable::onLoad(env); util::Iterator::onLoad(env); @@ -121,6 +135,7 @@ namespace java { util::function::BooleanSupplier::onUnload(env); util::Iterator::onUnload(env); lang::String::onUnload(env); + lang::RuntimeException::onUnload(env); lang::Float::onUnload(env); } } diff --git a/native/src/interop.hh b/native/src/interop.hh index 25acc7f4..60648560 100644 --- a/native/src/interop.hh +++ b/native/src/interop.hh @@ -34,6 +34,12 @@ namespace java { void onUnload(JNIEnv* env); } + namespace RuntimeException { + extern jclass cls; + void onLoad(JNIEnv* env); + void onUnload(JNIEnv* env); + } + namespace String { extern jclass cls; void onLoad(JNIEnv* env); diff --git a/shared/src/main/java/org/jetbrains/skija/BreakIterator.java b/shared/src/main/java/org/jetbrains/skija/BreakIterator.java new file mode 100644 index 00000000..0d02e855 --- /dev/null +++ b/shared/src/main/java/org/jetbrains/skija/BreakIterator.java @@ -0,0 +1,550 @@ +package org.jetbrains.skija; + +import java.lang.ref.*; +import java.util.*; +import org.jetbrains.annotations.*; +import org.jetbrains.skija.*; +import org.jetbrains.skija.impl.*; + +/** + *

A class that locates boundaries in text. This class defines a protocol for + * objects that break up a piece of natural-language text according to a set + * of criteria. Instances or subclasses of BreakIterator can be provided, for + * example, to break a piece of text into words, sentences, or logical characters + * according to the conventions of some language or group of languages. + * + * We provide four built-in types of BreakIterator: + *

    + *
  • makeSentenceInstance() returns a BreakIterator that locates boundaries + * between sentences. This is useful for triple-click selection, for example. + *
  • makeWordInstance() returns a BreakIterator that locates boundaries between + * words. This is useful for double-click selection or "find whole words" searches. + * This type of BreakIterator makes sure there is a boundary position at the + * beginning and end of each legal word. (Numbers count as words, too.) Whitespace + * and punctuation are kept separate from real words. + *
  • makeLineInstance() returns a BreakIterator that locates positions where it is + * legal for a text editor to wrap lines. This is similar to word breaking, but + * not the same: punctuation and whitespace are generally kept with words (you don't + * want a line to start with whitespace, for example), and some special characters + * can force a position to be considered a line-break position or prevent a position + * from being a line-break position. + *
  • makeCharacterInstance() returns a BreakIterator that locates boundaries between + * logical characters. Because of the structure of the Unicode encoding, a logical + * character may be stored internally as more than one Unicode code point. (A with an + * umlaut may be stored as an a followed by a separate combining umlaut character, + * for example, but the user still thinks of it as one character.) This iterator allows + * various processes (especially text editors) to treat as characters the units of text + * that a user would think of as characters, rather than the units of text that the + * computer sees as "characters".
+ * The text boundary positions are found according to the rules + * described in Unicode Standard Annex #29, Text Boundaries, and + * Unicode Standard Annex #14, Line Breaking Properties. These + * are available at http://www.unicode.org/reports/tr14/ and + * http://www.unicode.org/reports/tr29/. + *

+ * BreakIterator's interface follows an "iterator" model (hence the name), meaning it + * has a concept of a "current position" and methods like first(), last(), next(), + * and previous() that update the current position. All BreakIterators uphold the + * following invariants: + *

  • The beginning and end of the text are always treated as boundary positions. + *
  • The current position of the iterator is always a boundary position (random- + * access methods move the iterator to the nearest boundary position before or + * after the specified position, not _to_ the specified position). + *
  • DONE is used as a flag to indicate when iteration has stopped. DONE is only + * returned when the current position is the end of the text and the user calls next(), + * or when the current position is the beginning of the text and the user calls + * previous(). + *
  • Break positions are numbered by the positions of the characters that follow + * them. Thus, under normal circumstances, the position before the first character + * is 0, the position after the first character is 1, and the position after the + * last character is 1 plus the length of the string. + *
  • The client can change the position of an iterator, or the text it analyzes, + * at will, but cannot change the behavior. If the user wants different behavior, he + * must instantiate a new iterator.
+ * + * BreakIterator accesses the text it analyzes through a CharacterIterator, which makes + * it possible to use BreakIterator to analyze text in any text-storage vehicle that + * provides a CharacterIterator interface. + * + * Note: Some types of BreakIterator can take a long time to create, and + * instances of BreakIterator are not currently cached by the system. For + * optimal performance, keep instances of BreakIterator around as long as makes + * sense. For example, when word-wrapping a document, don't create and destroy a + * new BreakIterator for each line. Create one break iterator for the whole document + * (or whatever stretch of text you're wrapping) and use it to do the whole job of + * wrapping the text. + * + *

+ * Examples:

+ * Creating and using text boundaries + *

+ *
+ * public static void main(String args[]) {
+ *      if (args.length == 1) {
+ *          String stringToExamine = args[0];
+ *          //print each word in order
+ *          BreakIterator boundary = BreakIterator.makeWordInstance();
+ *          boundary.setText(stringToExamine);
+ *          printEachForward(boundary, stringToExamine);
+ *          //print each sentence in reverse order
+ *          boundary = BreakIterator.makeSentenceInstance(Locale.US);
+ *          boundary.setText(stringToExamine);
+ *          printEachBackward(boundary, stringToExamine);
+ *          printFirst(boundary, stringToExamine);
+ *          printLast(boundary, stringToExamine);
+ *      }
+ * }
+ * 
+ *
+ * + * Print each element in order + *
+ *
+ * public static void printEachForward(BreakIterator boundary, String source) {
+ *     int start = boundary.first();
+ *     for (int end = boundary.next();
+ *          end != BreakIterator.DONE;
+ *          start = end, end = boundary.next()) {
+ *          System.out.println(source.substring(start,end));
+ *     }
+ * }
+ * 
+ *
+ * + * Print each element in reverse order + *
+ *
+ * public static void printEachBackward(BreakIterator boundary, String source) {
+ *     int end = boundary.last();
+ *     for (int start = boundary.previous();
+ *          start != BreakIterator.DONE;
+ *          end = start, start = boundary.previous()) {
+ *         System.out.println(source.substring(start,end));
+ *     }
+ * }
+ * 
+ *
+ * + * Print first element + *
+ *
+ * public static void printFirst(BreakIterator boundary, String source) {
+ *     int start = boundary.first();
+ *     int end = boundary.next();
+ *     System.out.println(source.substring(start,end));
+ * }
+ * 
+ *
+ * + * Print last element + *
+ *
+ * public static void printLast(BreakIterator boundary, String source) {
+ *     int end = boundary.last();
+ *     int start = boundary.previous();
+ *     System.out.println(source.substring(start,end));
+ * }
+ * 
+ *
+ * + * Print the element at a specified position + *
+ *
+ * public static void printAt(BreakIterator boundary, int pos, String source) {
+ *     int end = boundary.following(pos);
+ *     int start = boundary.previous();
+ *     System.out.println(source.substring(start,end));
+ * }
+ * 
+ *
+ * + * Find the next word + *
+ *
+ * public static int nextWordStartAfter(int pos, String text) {
+ *     BreakIterator wb = BreakIterator.makeWordInstance();
+ *     wb.setText(text);
+ *     int wordStart = wb.following(pos);
+ *     for (;;) {
+ *         int wordLimit = wb.next();
+ *         if (wordLimit == BreakIterator.DONE) {
+ *             return BreakIterator.DONE;
+ *         }
+ *         int wordStatus = wb.getRuleStatus();
+ *         if (wordStatus != BreakIterator.WORD_NONE) {
+ *             return wordStart;
+ *         }
+ *         wordStart = wordLimit;
+ *      }
+ * }
+ * 
+ * The iterator returned by {@link #makeWordInstance} is unique in that + * the break positions it returns don't represent both the start and end of the + * thing being iterated over. That is, a sentence-break iterator returns breaks + * that each represent the end of one sentence and the beginning of the next. + * With the word-break iterator, the characters between two boundaries might be a + * word, or they might be the punctuation or whitespace between two words. The + * above code uses {@link #getRuleStatus} to identify and ignore boundaries associated + * with punctuation or other non-word characters. + *
+ */ +public class BreakIterator extends Managed implements Cloneable { + static { Library.staticLoad(); } + + /** + * DONE is returned by previous() and next() after all valid + * boundaries have been returned. + */ + public static final int DONE = -1; + + /** + * Tag value for "words" that do not fit into any of other categories. + * Includes spaces and most punctuation. + */ + public static final int WORD_NONE = 0; + + /** + * Upper bound for tags for uncategorized words. + */ + public static final int WORD_NONE_LIMIT = 100; + + /** + * Tag value for words that appear to be numbers, lower limit. + */ + public static final int WORD_NUMBER = 100; + + /** + * Tag value for words that appear to be numbers, upper limit. + */ + public static final int WORD_NUMBER_LIMIT = 200; + + /** + * Tag value for words that contain letters, excluding + * hiragana, katakana or ideographic characters, lower limit. + */ + public static final int WORD_LETTER = 200; + + /** + * Tag value for words containing letters, upper limit + */ + public static final int WORD_LETTER_LIMIT = 300; + + /** + * Tag value for words containing kana characters, lower limit + */ + public static final int WORD_KANA = 300; + + /** + * Tag value for words containing kana characters, upper limit + */ + public static final int WORD_KANA_LIMIT = 400; + + /** + * Tag value for words containing ideographic characters, lower limit + */ + public static final int WORD_IDEO = 400; + + /** + * Tag value for words containing ideographic characters, upper limit + */ + public static final int WORD_IDEO_LIMIT = 500; + + @ApiStatus.Internal public U16String _text; + + @ApiStatus.Internal + public BreakIterator(long ptr) { + super(ptr, _FinalizerHolder.PTR); + } + + @Override + public void close() { + super.close(); + if (_text != null) + _text.close(); + } + + /** + * Create a copy of this iterator + */ + @Override + public BreakIterator clone() { + Stats.onNativeCall(); + return new BreakIterator(_nClone(_ptr)); + } + + /** + * Returns a new BreakIterator instance for character breaks for the default locale. + */ + public static BreakIterator makeCharacterInstance() { + return makeCharacterInstance(null); + } + + /** + * Returns a new BreakIterator instance for character breaks for the given locale. + */ + public static BreakIterator makeCharacterInstance(String locale) { + Stats.onNativeCall(); + return new BreakIterator(_nMake(0, locale)); // UBRK_CHARACTER + } + + /** + * Returns a new BreakIterator instance for word breaks for the default locale. + */ + public static BreakIterator makeWordInstance() { + return makeWordInstance(null); + } + + /** + * Returns a new BreakIterator instance for word breaks for the given locale. + */ + public static BreakIterator makeWordInstance(String locale) { + Stats.onNativeCall(); + return new BreakIterator(_nMake(1, locale)); // UBRK_WORD + } + + /** + * Returns a new BreakIterator instance for line breaks for the default locale. + */ + public static BreakIterator makeLineInstance() { + return makeLineInstance(null); + } + + /** + * Returns a new BreakIterator instance for line breaks for the given locale. + */ + public static BreakIterator makeLineInstance(String locale) { + Stats.onNativeCall(); + return new BreakIterator(_nMake(2, locale)); // UBRK_LINE + } + + /** + * Returns a new BreakIterator instance for sentence breaks for the default locale. + */ + public static BreakIterator makeSentenceInstance() { + return makeSentenceInstance(null); + } + + /** + * Returns a new BreakIterator instance for sentence breaks for the given locale. + */ + public static BreakIterator makeSentenceInstance(String locale) { + Stats.onNativeCall(); + return new BreakIterator(_nMake(3, locale)); // UBRK_SENTENCE + } + + /** + * Returns character index of the text boundary that was most recently + * returned by {@link next()}, {@link next(int)}, {@link previous()}, + * {@link first()}, {@link last()}, {@link following(int)} or + * {@link preceding(int)}. If any of these methods returns + * {@link BreakIterator#DONE} because either first or last text boundary + * has been reached, it returns the first or last text boundary depending + * on which one is reached. + */ + public int current() { + try { + Stats.onNativeCall(); + return _nCurrent(_ptr); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * Returns the boundary following the current boundary. If the current + * boundary is the last text boundary, it returns {@link BreakIterator#DONE} + * and the iterator's current position is unchanged. Otherwise, the + * iterator's current position is set to the boundary following the current + * boundary. + */ + public int next() { + try { + Stats.onNativeCall(); + return _nNext(_ptr); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * Advances the iterator either forward or backward the specified number of steps. + * Negative values move backward, and positive values move forward. This is + * equivalent to repeatedly calling next() or previous(). + * @param n The number of steps to move. The sign indicates the direction + * (negative is backwards, and positive is forwards). + * @return The character offset of the boundary position n boundaries away from + * the current one. + */ + public int next(int n) { + int result = 0; + if (n > 0) { + for (; n > 0 && result != DONE; --n) { + result = next(); + } + } else if (n < 0) { + for (; n < 0 && result != DONE; ++n) { + result = previous(); + } + } else { + result = current(); + } + return result; + } + + /** + * Returns the boundary following the current boundary. If the current + * boundary is the last text boundary, it returns {@link BreakIterator#DONE} + * and the iterator's current position is unchanged. Otherwise, the + * iterator's current position is set to the boundary following the current + * boundary. + */ + public int previous() { + try { + Stats.onNativeCall(); + return _nPrevious(_ptr); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * Returns the first boundary. The iterator's current position is set to the first text boundary. + */ + public int first() { + try { + Stats.onNativeCall(); + return _nFirst(_ptr); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * Returns the last boundary. The iterator's current position is set to the last text boundary. + */ + public int last() { + try { + Stats.onNativeCall(); + return _nLast(_ptr); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * Returns the last boundary preceding the specified character offset. + * If the specified offset is equal to the first text boundary, it returns + * {@link BreakIterator#DONE} and the iterator's current position is + * unchanged. Otherwise, the iterator's current position is set to the + * returned boundary. The value returned is always less than the offset or + * the value {@link BreakIterator#DONE}. + */ + public int preceding(int offset) { + try { + Stats.onNativeCall(); + return _nPreceding(_ptr, offset); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * Returns the first boundary following the specified character offset. + * If the specified offset is equal to the last text boundary, it returns + * {@link BreakIterator#DONE} and the iterator's current position is + * unchanged. Otherwise, the iterator's current position is set to the + * returned boundary. The value returned is always less than the offset or + * the value {@link BreakIterator#DONE}. + */ + public int following(int offset) { + try { + Stats.onNativeCall(); + return _nFollowing(_ptr, offset); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * Returns true if the specified character offset is a text boundary. + */ + public boolean isBoundary(int offset) { + try { + Stats.onNativeCall(); + return _nIsBoundary(_ptr, offset); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * For rule-based BreakIterators, return the status tag from the + * break rule that determined the boundary at the current iteration position. + *

+ * For break iterator types that do not support a rule status, + * a default value of 0 is returned. + *

+ * @return The status from the break rule that determined the boundary + * at the current iteration position. + */ + public int getRuleStatus() { + try { + Stats.onNativeCall(); + return _nGetRuleStatus(_ptr); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) + * that determined the the boundary at the current iteration position. + *

+ * For break iterator types that do not support rule status, + * no values are returned. + * + * @return an array with the status values. + */ + public int[] getRuleStatuses() { + try { + Stats.onNativeCall(); + return _nGetRuleStatuses(_ptr); + } finally { + Reference.reachabilityFence(this); + } + } + + /** + * Set a new text string to be scanned. The current scan position is reset to {@link first()}. + */ + public void setText(String text) { + try { + Stats.onNativeCall(); + _text = new U16String(text); + _nSetText(_ptr, Native.getPtr(_text)); + } finally { + Reference.reachabilityFence(this); + Reference.reachabilityFence(_text); + } + } + + @ApiStatus.Internal + public static class _FinalizerHolder { + public static final long PTR = _nGetFinalizer(); + } + + @ApiStatus.Internal public static native long _nGetFinalizer(); + @ApiStatus.Internal public static native long _nMake(int type, String locale); + @ApiStatus.Internal public static native long _nClone(long ptr); + @ApiStatus.Internal public static native int _nCurrent(long ptr); + @ApiStatus.Internal public static native int _nNext(long ptr); + @ApiStatus.Internal public static native int _nPrevious(long ptr); + @ApiStatus.Internal public static native int _nFirst(long ptr); + @ApiStatus.Internal public static native int _nLast(long ptr); + @ApiStatus.Internal public static native int _nPreceding(long ptr, int offset); + @ApiStatus.Internal public static native int _nFollowing(long ptr, int offset); + @ApiStatus.Internal public static native boolean _nIsBoundary(long ptr, int offset); + @ApiStatus.Internal public static native int _nGetRuleStatus(long ptr); + @ApiStatus.Internal public static native int[] _nGetRuleStatuses(long ptr); + @ApiStatus.Internal public static native void _nSetText(long ptr, long textPtr); +} diff --git a/shared/src/main/java/org/jetbrains/skija/U16String.java b/shared/src/main/java/org/jetbrains/skija/U16String.java new file mode 100644 index 00000000..0433720b --- /dev/null +++ b/shared/src/main/java/org/jetbrains/skija/U16String.java @@ -0,0 +1,42 @@ +package org.jetbrains.skija; + +import java.lang.ref.*; +import lombok.*; +import org.jetbrains.annotations.*; +import org.jetbrains.skija.impl.*; + +/** + * Java mirror of std::vector<jchar> (UTF-16) + */ +public class U16String extends Managed { + static { Library.staticLoad(); } + + @ApiStatus.Internal + public U16String(long ptr) { + super(ptr, _FinalizerHolder.PTR); + } + + public U16String(String s) { + this(_nMake(s)); + Stats.onNativeCall(); + } + + @Override + public String toString() { + try { + Stats.onNativeCall(); + return _nToString(_ptr); + } finally { + Reference.reachabilityFence(this); + } + } + + @ApiStatus.Internal + public static class _FinalizerHolder { + public static final long PTR = _nGetFinalizer(); + } + + @ApiStatus.Internal public static native long _nMake(String s); + @ApiStatus.Internal public static native long _nGetFinalizer(); + @ApiStatus.Internal public static native String _nToString(long ptr); +} \ No newline at end of file