From 23a2e12ac825dc31ca2bda73989b02ea8ff22718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Hohwiller?= Date: Mon, 29 May 2023 17:49:07 +0200 Subject: [PATCH] small improvements --- .../scanner/AbstractCharStreamScanner.java | 90 ++++++++++++------- .../mmm/scanner/CharSequenceScanner.java | 59 ++++++++++-- .../github/mmm/scanner/CharStreamScanner.java | 64 +++++++++++-- .../SimpleTextFormatMessageHandler.java | 27 +----- 4 files changed, 172 insertions(+), 68 deletions(-) diff --git a/core/src/main/java/io/github/mmm/scanner/AbstractCharStreamScanner.java b/core/src/main/java/io/github/mmm/scanner/AbstractCharStreamScanner.java index d0933f4..ebfb640 100644 --- a/core/src/main/java/io/github/mmm/scanner/AbstractCharStreamScanner.java +++ b/core/src/main/java/io/github/mmm/scanner/AbstractCharStreamScanner.java @@ -2,8 +2,6 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ package io.github.mmm.scanner; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; import org.slf4j.Logger; @@ -34,8 +32,6 @@ public abstract class AbstractCharStreamScanner implements CharStreamScanner { private final TextFormatMessageHandler messageHandler; - private final boolean collectMessages; - /** The internal buffer with character data. */ protected char[] buffer; @@ -54,9 +50,6 @@ public abstract class AbstractCharStreamScanner implements CharStreamScanner { /** @see #getColumn() */ protected int column; - /** @see #getMessages() */ - private List messages; - /** A {@link StringBuilder} instance that can be shared and reused. May initially be null. */ private StringBuilder sb; @@ -71,6 +64,19 @@ public AbstractCharStreamScanner(int capacity, TextFormatMessageHandler messageH this(new char[capacity], messageHandler); } + /** + * The constructor. + * + * @param capacity the capacity of the internal buffer in {@code char}s. + * @param messageHandler the {@link TextFormatMessageHandler}. + * @param line the initial {@link #getLine() line}. + * @param column the initial {@link #getColumn() column}. + */ + public AbstractCharStreamScanner(int capacity, TextFormatMessageHandler messageHandler, int line, int column) { + + this(new char[capacity], messageHandler, line, column); + } + /** * The constructor. * @@ -79,22 +85,30 @@ public AbstractCharStreamScanner(int capacity, TextFormatMessageHandler messageH */ public AbstractCharStreamScanner(char[] buffer, TextFormatMessageHandler messageHandler) { + this(buffer, messageHandler, 1, 1); + } + + /** + * The constructor. + * + * @param buffer the internal {@code char[]} buffer. + * @param messageHandler the {@link TextFormatMessageHandler}. + * @param line the initial {@link #getLine() line}. + * @param column the initial {@link #getColumn() column}. + */ + public AbstractCharStreamScanner(char[] buffer, TextFormatMessageHandler messageHandler, int line, int column) { + super(); if (messageHandler == null) { this.messageHandler = SimpleTextFormatMessageHandler.get(); } else { this.messageHandler = messageHandler; } - if (this.messageHandler instanceof SimpleTextFormatMessageHandler) { - this.collectMessages = ((SimpleTextFormatMessageHandler) this.messageHandler).isCollectMessages(); - } else { - this.collectMessages = true; - } this.buffer = buffer; this.offset = 0; this.limit = 0; - this.line = 1; - this.column = 1; + this.line = line; + this.column = column; } @Override @@ -112,23 +126,13 @@ public int getColumn() { @Override public void addMessage(TextFormatMessage message) { - message = this.messageHandler.handle(message); - if (this.collectMessages) { - getMessages().add(message); - } + this.messageHandler.add(message); } @Override public List getMessages() { - if (this.messages == null) { - if (this.collectMessages) { - this.messages = new ArrayList<>(); - } else { - this.messages = Collections.emptyList(); - } - } - return this.messages; + return this.messageHandler.getMessages(); } /** @@ -576,16 +580,16 @@ public void require(String expected, boolean ignoreCase) { } @Override - public boolean expectOne(char expected) { + public boolean expectOne(char expected, boolean warning) { - if (!hasNext()) { - return false; - } - if (this.buffer[this.offset] == expected) { + if (hasNext() && (this.buffer[this.offset] == expected)) { handleChar(expected); this.offset++; return true; } + if (warning) { + addWarning("Expected '" + expected + "'"); + } return false; } @@ -1249,6 +1253,30 @@ public int skip(int count) { return skipped; } + @Override + public int skipNewLine() { + + int skip = 0; + if (hasNext()) { + if (this.buffer[this.offset] == '\n') { + skip = 1; + } else if (this.buffer[this.offset] == '\r') { + if (((this.offset + 1) < this.limit) && (this.buffer[this.offset + 1] == '\n')) { + skip = 2; + } else if (peek(1) == '\n') { + skip(2); + return 2; + } + } + } + if (skip > 0) { + this.offset = this.offset + skip; + this.line++; + this.column = 1; + } + return skip; + } + @Override public boolean skipUntil(char stop) { diff --git a/core/src/main/java/io/github/mmm/scanner/CharSequenceScanner.java b/core/src/main/java/io/github/mmm/scanner/CharSequenceScanner.java index 5ae93ce..12c0b9e 100644 --- a/core/src/main/java/io/github/mmm/scanner/CharSequenceScanner.java +++ b/core/src/main/java/io/github/mmm/scanner/CharSequenceScanner.java @@ -62,7 +62,20 @@ public CharSequenceScanner(String string) { */ public CharSequenceScanner(String string, TextFormatMessageHandler messageHandler) { - this(string.toCharArray(), messageHandler); + this(string, messageHandler, 1, 1); + } + + /** + * The constructor. + * + * @param string is the {@link #getOriginalString() string} to parse. + * @param messageHandler the {@link TextFormatMessageHandler}. + * @param line the initial {@link #getLine() line}. + * @param column the initial {@link #getColumn() column}. + */ + public CharSequenceScanner(String string, TextFormatMessageHandler messageHandler, int line, int column) { + + this(string.toCharArray(), messageHandler, line, column); this.string = string; } @@ -87,6 +100,19 @@ public CharSequenceScanner(char[] characters, TextFormatMessageHandler messageHa this(characters, 0, characters.length, messageHandler); } + /** + * The constructor. + * + * @param characters is an array containing the characters to scan. + * @param messageHandler the {@link TextFormatMessageHandler}. + * @param line the initial {@link #getLine() line}. + * @param column the initial {@link #getColumn() column}. + */ + public CharSequenceScanner(char[] characters, TextFormatMessageHandler messageHandler, int line, int column) { + + this(characters, 0, characters.length, messageHandler, line, column); + } + /** * The constructor. * @@ -113,7 +139,25 @@ public CharSequenceScanner(char[] characters, int offset, int length) { */ public CharSequenceScanner(char[] characters, int offset, int length, TextFormatMessageHandler messageHandler) { - super(characters, messageHandler); + this(characters, offset, length, messageHandler, 1, 1); + } + + /** + * The constructor. + * + * @param characters is an array containing the characters to scan. + * @param offset is the index of the first char to scan in {@code characters} (typically {@code 0} to start at the + * beginning of the array). + * @param length is the {@link #getLength() number of characters} to scan from {@code characters} starting at + * {@code offset} (typically characters.length - offset). + * @param messageHandler the {@link TextFormatMessageHandler}. + * @param line the initial {@link #getLine() line}. + * @param column the initial {@link #getColumn() column}. + */ + public CharSequenceScanner(char[] characters, int offset, int length, TextFormatMessageHandler messageHandler, + int line, int column) { + + super(characters, messageHandler, line, column); if (offset < 0) { throw new IndexOutOfBoundsException(Integer.toString(offset)); } @@ -264,7 +308,7 @@ public char peek(int lookaheadOffset) { return this.buffer[i]; } } - return 0; + return EOS; } /** @@ -296,15 +340,12 @@ public String peekWhile(CharFilter filter, int maxLen) { throw new IllegalArgumentException("Max must NOT be negative: " + maxLen); } int len = 0; - int end = this.offset + maxLen; - if (end < 0) { // overflow? + int end = this.limit - this.offset; + if (end > maxLen) { end = maxLen; } - if (end > this.limit) { - end = this.limit; - } while (len < end) { - char c = this.buffer[len]; + char c = this.buffer[this.offset + len]; if (!filter.accept(c)) { break; } diff --git a/core/src/main/java/io/github/mmm/scanner/CharStreamScanner.java b/core/src/main/java/io/github/mmm/scanner/CharStreamScanner.java index 0514388..2beecfa 100644 --- a/core/src/main/java/io/github/mmm/scanner/CharStreamScanner.java +++ b/core/src/main/java/io/github/mmm/scanner/CharStreamScanner.java @@ -82,7 +82,7 @@ public interface CharStreamScanner extends TextFormatProcessor { /** * @param filter the {@link CharFilter} {@link CharFilter#accept(char) accepting} only the characters to peek. - * @param maxLen the maximum number of characters to peek (get as lookahead without modifying this stream). + * @param maxLen the maximum number of characters to peek (to get as lookahead without modifying this stream). * @return a {@link String} with the {@link #peek() peeked} characters of the given {@code maxLen} or less if a * character was hit that is not {@link CharFilter#accept(char) accepted} by the given {@code filter} * or the end-of-text has been reached before. The state of this stream remains unchanged. @@ -709,14 +709,60 @@ default boolean expect(String expected, boolean ignoreCase, boolean lookahead) { boolean expect(String expected, boolean ignoreCase, boolean lookahead, int offset); /** - * This method checks that the {@link #next() next character} is equal to the given {@code expected} character.
- * If the current character was as expected, the parser points to the next character. Otherwise its position will - * remain unchanged. + * This method determines if the given {@code expected} {@link String} is completely present at the current position. + * It will only {@link #next() consume} characters and change the state if {@code lookahead} is {@code false} and the + * {@code expected} {@link String} was found (entirely).
+ * Attention:
+ * This method requires lookahead. For implementations that are backed by an underlying stream (or reader) the + * {@link String#length() length} of the expected {@link String} shall not exceed the available lookahead size (buffer + * capacity given at construction time). Otherwise the method may fail. + * + * @param expected the expected {@link String} to search for. + * @param ignoreCase - if {@code true} the case of the characters is ignored when compared, {@code false} otherwise. + * @param lookahead - if {@code true} the state of the scanner remains unchanged even if the expected {@link String} + * has been found, {@code false} otherwise (expected {@link String} is consumed on match). + * @param offset the number of characters that have already been {@link #peek(int) peeked} and after which the given + * {@link String} is expected. Will typically be {@code 0}. If {@code lookahead} is {@code false} and the + * expected {@link String} was found these characters will be {@link #skip(int) skipped} together with the + * expected {@link String}. + * @param warning {@code true} to {@link #addWarning(String) add a warning} in case the expected {@link String} was + * not found, {@code false} otherwise. + * @return {@code true} if the {@code expected} string was successfully found, {@code false} otherwise. + */ + default boolean expect(String expected, boolean ignoreCase, boolean lookahead, int offset, boolean warning) { + + boolean found = expect(expected, ignoreCase, lookahead, offset); + if (!found && warning) { + addWarning("Expected '" + expected + "'"); + } + return found; + } + + /** + * This method checks if the {@link #next() next character} is equal to the given {@code expected} character.
+ * If the character matched with the {@code expected} character, the parser points to the next character. Otherwise + * its position will remain unchanged. * * @param expected is the expected character. * @return {@code true} if the current character is the same as {@code expected}, {@code false} otherwise. */ - boolean expectOne(char expected); + default boolean expectOne(char expected) { + + return expectOne(expected, false); + } + + /** + * This method checks if the {@link #next() next character} is equal to the given {@code expected} character.
+ * If the character matched with the {@code expected} character, the parser points to the next character. Otherwise + * its position will remain unchanged. + * + * @param expected the character to expect as {@link #next() next} in this stream. + * @param warning {@code true} to {@link #addWarning(String) add a warning} in case the expected character was not + * present, {@code false} otherwise. + * @return {@code true} if the expected character was found and consumer, {@code false} otherwise (and this stream + * remains unchanged). + */ + boolean expectOne(char expected, boolean warning); /** * This method checks that the {@link #next() next character} is {@link CharFilter#accept(char) accepted} by the given @@ -936,6 +982,14 @@ private IllegalStateException invalidCharCount(String bound, int count, CharFilt */ int skip(int count); + /** + * @return {@code 0} if the {@link #next() next characeter} is not a newline and the stream remains unchanged, + * {@code 1} if the {@link #next() next characeter} was '\n' and has been {@link #skip(int) skipped}, or + * {@code 2} if the{@link #next() next characeters} have been '\r' and '\n' and have been {@link #skip(int) + * skipped}. + */ + int skipNewLine(); + /** * This method reads all {@link #next() next characters} until the given {@code substring} has been detected.
* After the call of this method, the current index will point to the next character after the first occurrence of diff --git a/core/src/main/java/io/github/mmm/scanner/SimpleTextFormatMessageHandler.java b/core/src/main/java/io/github/mmm/scanner/SimpleTextFormatMessageHandler.java index a87b00e..80262f5 100644 --- a/core/src/main/java/io/github/mmm/scanner/SimpleTextFormatMessageHandler.java +++ b/core/src/main/java/io/github/mmm/scanner/SimpleTextFormatMessageHandler.java @@ -5,16 +5,16 @@ import org.slf4j.Logger; import org.slf4j.event.Level; +import io.github.mmm.base.text.AbstractTextFormatMessageHandler; import io.github.mmm.base.text.TextFormatMessage; import io.github.mmm.base.text.TextFormatMessageHandler; import io.github.mmm.base.text.TextFormatMessageType; -import io.github.mmm.base.text.TextFormatProcessor; /** * Default implementation of {@link TextFormatMessageHandler}. */ @SuppressWarnings("exports") -public class SimpleTextFormatMessageHandler implements TextFormatMessageHandler { +public class SimpleTextFormatMessageHandler extends AbstractTextFormatMessageHandler { /** Maps each {@link TextFormatMessageType} to its analog {@link Level}. */ public static final Function LOG_MAPPER_DEFAULT = new LogLevelMapper(false); @@ -28,14 +28,10 @@ public class SimpleTextFormatMessageHandler implements TextFormatMessageHandler private static final SimpleTextFormatMessageHandler INSTANCE = new SimpleTextFormatMessageHandler(false, AbstractCharStreamScanner.LOG, LOG_MAPPER_INFO_AS_DEBUG, false); - private final boolean throwOnError; - private final Logger logger; private final Function logLevelMapper; - private final boolean collectMessages; - /** * The constructor. * @@ -48,11 +44,9 @@ public class SimpleTextFormatMessageHandler implements TextFormatMessageHandler public SimpleTextFormatMessageHandler(boolean throwOnError, Logger logger, Function logLevelMapper, boolean collectMessages) { - super(); - this.throwOnError = throwOnError; + super(throwOnError, collectMessages); this.logger = logger; this.logLevelMapper = logLevelMapper; - this.collectMessages = collectMessages; } private void log(TextFormatMessage message) { @@ -71,20 +65,7 @@ private void log(TextFormatMessage message) { public TextFormatMessage handle(TextFormatMessage message) { log(message); - if (this.throwOnError && (message.getType() == TextFormatMessageType.ERROR)) { - throw new IllegalStateException(message.getText()); - } - return TextFormatMessageHandler.super.handle(message); - } - - /** - * @return {@code true} to collect {@link TextFormatProcessor#getMessages() messages} (default) or {@code null} if no - * {@link TextFormatMessage} should be collected and {@link TextFormatProcessor#getMessages()} will always - * return the {@link java.util.Collections#emptyList() empty list}. - */ - public boolean isCollectMessages() { - - return this.collectMessages; + return super.handle(message); } private static final class LogLevelMapper implements Function {