Skip to content

Commit

Permalink
small improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
hohwille committed May 29, 2023
1 parent aee6bc2 commit 23a2e12
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
* http://www.apache.org/licenses/LICENSE-2.0 */
package io.github.mmm.scanner;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.slf4j.Logger;
Expand Down Expand Up @@ -34,8 +32,6 @@ public abstract class AbstractCharStreamScanner implements CharStreamScanner {

private final TextFormatMessageHandler messageHandler;

private final boolean collectMessages;

/** The internal buffer with character data. */
protected char[] buffer;

Expand All @@ -54,9 +50,6 @@ public abstract class AbstractCharStreamScanner implements CharStreamScanner {
/** @see #getColumn() */
protected int column;

/** @see #getMessages() */
private List<TextFormatMessage> messages;

/** A {@link StringBuilder} instance that can be shared and reused. May initially be <code>null</code>. */
private StringBuilder sb;

Expand All @@ -71,6 +64,19 @@ public AbstractCharStreamScanner(int capacity, TextFormatMessageHandler messageH
this(new char[capacity], messageHandler);
}

/**
* The constructor.
*
* @param capacity the capacity of the internal buffer in {@code char}s.
* @param messageHandler the {@link TextFormatMessageHandler}.
* @param line the initial {@link #getLine() line}.
* @param column the initial {@link #getColumn() column}.
*/
public AbstractCharStreamScanner(int capacity, TextFormatMessageHandler messageHandler, int line, int column) {

this(new char[capacity], messageHandler, line, column);
}

/**
* The constructor.
*
Expand All @@ -79,22 +85,30 @@ public AbstractCharStreamScanner(int capacity, TextFormatMessageHandler messageH
*/
public AbstractCharStreamScanner(char[] buffer, TextFormatMessageHandler messageHandler) {

this(buffer, messageHandler, 1, 1);
}

/**
* The constructor.
*
* @param buffer the internal {@code char[]} buffer.
* @param messageHandler the {@link TextFormatMessageHandler}.
* @param line the initial {@link #getLine() line}.
* @param column the initial {@link #getColumn() column}.
*/
public AbstractCharStreamScanner(char[] buffer, TextFormatMessageHandler messageHandler, int line, int column) {

super();
if (messageHandler == null) {
this.messageHandler = SimpleTextFormatMessageHandler.get();
} else {
this.messageHandler = messageHandler;
}
if (this.messageHandler instanceof SimpleTextFormatMessageHandler) {
this.collectMessages = ((SimpleTextFormatMessageHandler) this.messageHandler).isCollectMessages();
} else {
this.collectMessages = true;
}
this.buffer = buffer;
this.offset = 0;
this.limit = 0;
this.line = 1;
this.column = 1;
this.line = line;
this.column = column;
}

@Override
Expand All @@ -112,23 +126,13 @@ public int getColumn() {
@Override
public void addMessage(TextFormatMessage message) {

message = this.messageHandler.handle(message);
if (this.collectMessages) {
getMessages().add(message);
}
this.messageHandler.add(message);
}

@Override
public List<TextFormatMessage> getMessages() {

if (this.messages == null) {
if (this.collectMessages) {
this.messages = new ArrayList<>();
} else {
this.messages = Collections.emptyList();
}
}
return this.messages;
return this.messageHandler.getMessages();
}

/**
Expand Down Expand Up @@ -576,16 +580,16 @@ public void require(String expected, boolean ignoreCase) {
}

@Override
public boolean expectOne(char expected) {
public boolean expectOne(char expected, boolean warning) {

if (!hasNext()) {
return false;
}
if (this.buffer[this.offset] == expected) {
if (hasNext() && (this.buffer[this.offset] == expected)) {
handleChar(expected);
this.offset++;
return true;
}
if (warning) {
addWarning("Expected '" + expected + "'");
}
return false;
}

Expand Down Expand Up @@ -1249,6 +1253,30 @@ public int skip(int count) {
return skipped;
}

@Override
public int skipNewLine() {

int skip = 0;
if (hasNext()) {
if (this.buffer[this.offset] == '\n') {
skip = 1;
} else if (this.buffer[this.offset] == '\r') {
if (((this.offset + 1) < this.limit) && (this.buffer[this.offset + 1] == '\n')) {
skip = 2;
} else if (peek(1) == '\n') {
skip(2);
return 2;
}
}
}
if (skip > 0) {
this.offset = this.offset + skip;
this.line++;
this.column = 1;
}
return skip;
}

@Override
public boolean skipUntil(char stop) {

Expand Down
59 changes: 50 additions & 9 deletions core/src/main/java/io/github/mmm/scanner/CharSequenceScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,20 @@ public CharSequenceScanner(String string) {
*/
public CharSequenceScanner(String string, TextFormatMessageHandler messageHandler) {

this(string.toCharArray(), messageHandler);
this(string, messageHandler, 1, 1);
}

/**
* The constructor.
*
* @param string is the {@link #getOriginalString() string} to parse.
* @param messageHandler the {@link TextFormatMessageHandler}.
* @param line the initial {@link #getLine() line}.
* @param column the initial {@link #getColumn() column}.
*/
public CharSequenceScanner(String string, TextFormatMessageHandler messageHandler, int line, int column) {

this(string.toCharArray(), messageHandler, line, column);
this.string = string;
}

Expand All @@ -87,6 +100,19 @@ public CharSequenceScanner(char[] characters, TextFormatMessageHandler messageHa
this(characters, 0, characters.length, messageHandler);
}

/**
* The constructor.
*
* @param characters is an array containing the characters to scan.
* @param messageHandler the {@link TextFormatMessageHandler}.
* @param line the initial {@link #getLine() line}.
* @param column the initial {@link #getColumn() column}.
*/
public CharSequenceScanner(char[] characters, TextFormatMessageHandler messageHandler, int line, int column) {

this(characters, 0, characters.length, messageHandler, line, column);
}

/**
* The constructor.
*
Expand All @@ -113,7 +139,25 @@ public CharSequenceScanner(char[] characters, int offset, int length) {
*/
public CharSequenceScanner(char[] characters, int offset, int length, TextFormatMessageHandler messageHandler) {

super(characters, messageHandler);
this(characters, offset, length, messageHandler, 1, 1);
}

/**
* The constructor.
*
* @param characters is an array containing the characters to scan.
* @param offset is the index of the first char to scan in {@code characters} (typically {@code 0} to start at the
* beginning of the array).
* @param length is the {@link #getLength() number of characters} to scan from {@code characters} starting at
* {@code offset} (typically <code>characters.length - offset</code>).
* @param messageHandler the {@link TextFormatMessageHandler}.
* @param line the initial {@link #getLine() line}.
* @param column the initial {@link #getColumn() column}.
*/
public CharSequenceScanner(char[] characters, int offset, int length, TextFormatMessageHandler messageHandler,
int line, int column) {

super(characters, messageHandler, line, column);
if (offset < 0) {
throw new IndexOutOfBoundsException(Integer.toString(offset));
}
Expand Down Expand Up @@ -264,7 +308,7 @@ public char peek(int lookaheadOffset) {
return this.buffer[i];
}
}
return 0;
return EOS;
}

/**
Expand Down Expand Up @@ -296,15 +340,12 @@ public String peekWhile(CharFilter filter, int maxLen) {
throw new IllegalArgumentException("Max must NOT be negative: " + maxLen);
}
int len = 0;
int end = this.offset + maxLen;
if (end < 0) { // overflow?
int end = this.limit - this.offset;
if (end > maxLen) {
end = maxLen;
}
if (end > this.limit) {
end = this.limit;
}
while (len < end) {
char c = this.buffer[len];
char c = this.buffer[this.offset + len];
if (!filter.accept(c)) {
break;
}
Expand Down
64 changes: 59 additions & 5 deletions core/src/main/java/io/github/mmm/scanner/CharStreamScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public interface CharStreamScanner extends TextFormatProcessor {

/**
* @param filter the {@link CharFilter} {@link CharFilter#accept(char) accepting} only the characters to peek.
* @param maxLen the maximum number of characters to peek (get as lookahead without modifying this stream).
* @param maxLen the maximum number of characters to peek (to get as lookahead without modifying this stream).
* @return a {@link String} with the {@link #peek() peeked} characters of the given {@code maxLen} or less if a
* character was hit that is <em>not</em> {@link CharFilter#accept(char) accepted} by the given {@code filter}
* or the end-of-text has been reached before. The state of this stream remains unchanged.
Expand Down Expand Up @@ -709,14 +709,60 @@ default boolean expect(String expected, boolean ignoreCase, boolean lookahead) {
boolean expect(String expected, boolean ignoreCase, boolean lookahead, int offset);

/**
* This method checks that the {@link #next() next character} is equal to the given {@code expected} character. <br>
* If the current character was as expected, the parser points to the next character. Otherwise its position will
* remain unchanged.
* This method determines if the given {@code expected} {@link String} is completely present at the current position.
* It will only {@link #next() consume} characters and change the state if {@code lookahead} is {@code false} and the
* {@code expected} {@link String} was found (entirely).<br>
* <b>Attention:</b><br>
* This method requires lookahead. For implementations that are backed by an underlying stream (or reader) the
* {@link String#length() length} of the expected {@link String} shall not exceed the available lookahead size (buffer
* capacity given at construction time). Otherwise the method may fail.
*
* @param expected the expected {@link String} to search for.
* @param ignoreCase - if {@code true} the case of the characters is ignored when compared, {@code false} otherwise.
* @param lookahead - if {@code true} the state of the scanner remains unchanged even if the expected {@link String}
* has been found, {@code false} otherwise (expected {@link String} is consumed on match).
* @param offset the number of characters that have already been {@link #peek(int) peeked} and after which the given
* {@link String} is expected. Will typically be {@code 0}. If {@code lookahead} is {@code false} and the
* expected {@link String} was found these characters will be {@link #skip(int) skipped} together with the
* expected {@link String}.
* @param warning {@code true} to {@link #addWarning(String) add a warning} in case the expected {@link String} was
* not found, {@code false} otherwise.
* @return {@code true} if the {@code expected} string was successfully found, {@code false} otherwise.
*/
default boolean expect(String expected, boolean ignoreCase, boolean lookahead, int offset, boolean warning) {

boolean found = expect(expected, ignoreCase, lookahead, offset);
if (!found && warning) {
addWarning("Expected '" + expected + "'");
}
return found;
}

/**
* This method checks if the {@link #next() next character} is equal to the given {@code expected} character. <br>
* If the character matched with the {@code expected} character, the parser points to the next character. Otherwise
* its position will remain unchanged.
*
* @param expected is the expected character.
* @return {@code true} if the current character is the same as {@code expected}, {@code false} otherwise.
*/
boolean expectOne(char expected);
default boolean expectOne(char expected) {

return expectOne(expected, false);
}

/**
* This method checks if the {@link #next() next character} is equal to the given {@code expected} character. <br>
* If the character matched with the {@code expected} character, the parser points to the next character. Otherwise
* its position will remain unchanged.
*
* @param expected the character to expect as {@link #next() next} in this stream.
* @param warning {@code true} to {@link #addWarning(String) add a warning} in case the expected character was not
* present, {@code false} otherwise.
* @return {@code true} if the expected character was found and consumer, {@code false} otherwise (and this stream
* remains unchanged).
*/
boolean expectOne(char expected, boolean warning);

/**
* This method checks that the {@link #next() next character} is {@link CharFilter#accept(char) accepted} by the given
Expand Down Expand Up @@ -936,6 +982,14 @@ private IllegalStateException invalidCharCount(String bound, int count, CharFilt
*/
int skip(int count);

/**
* @return {@code 0} if the {@link #next() next characeter} is not a newline and the stream remains unchanged,
* {@code 1} if the {@link #next() next characeter} was '\n' and has been {@link #skip(int) skipped}, or
* {@code 2} if the{@link #next() next characeters} have been '\r' and '\n' and have been {@link #skip(int)
* skipped}.
*/
int skipNewLine();

/**
* This method reads all {@link #next() next characters} until the given {@code substring} has been detected. <br>
* After the call of this method, the current index will point to the next character after the first occurrence of
Expand Down
Loading

0 comments on commit 23a2e12

Please sign in to comment.