Skip to content

Commit

Permalink
Parse cql files with code comments (#57)
Browse files Browse the repository at this point in the history
*  * parse cql files with sql comments fixed

*  * CqlFileParser reimplemented
 * We should ignore code comments, normolize queries and preserve newline in valueExpressions

*  * Unmatched column names/values fixed

*  *  typo fixed, shouldExceptionOnMissingSemicolon added

*  * shouldCopeWithSingleLineStatement added

*  * coding style fixed
  • Loading branch information
ssserj authored and adamdougal committed Sep 22, 2017
1 parent efb3b37 commit a1eb1f4
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 26 deletions.
171 changes: 146 additions & 25 deletions src/main/java/uk/sky/cqlmigrate/CqlFileParser.java
Original file line number Diff line number Diff line change
@@ -1,58 +1,179 @@
package uk.sky.cqlmigrate;

import com.google.common.base.CharMatcher;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.google.common.base.Preconditions.checkState;

class CqlFileParser {
private static final Logger LOGGER = LoggerFactory.getLogger(CqlFileParser.class);
private static final Pattern EOL = Pattern.compile(".*\\R|.+\\z");

private CqlFileParser() {}

private static final char CQL_STATEMENT_STRING_DELIMITER = '\'';
private static final String CQL_STATEMENT_TERMINATOR = ";";

static List<String> getCqlStatementsFrom(Path cqlPath) {
String cqlFileAsString;
LineProcessor processor = new LineProcessor();

try (BufferedReader cqlReader = Files.newBufferedReader(cqlPath, Charsets.UTF_8)) {
StringBuilder stringBuilder = new StringBuilder();
cqlReader.lines().forEach(stringBuilder::append);
cqlFileAsString = stringBuilder.toString();
try (Scanner in = new Scanner(cqlPath, "UTF-8")) {
String original;
while ((original = in.findWithinHorizon(EOL, 0)) != null) {
processor.process(original);
}
} catch (IOException e) {
LOGGER.error("Failed to execute cql script {}: {}", cqlPath.getFileName(), e.getMessage());
LOGGER.error("Failed to process cql script {}: {}", cqlPath.getFileName(), e.getMessage());
throw Throwables.propagate(e);
}

checkState(cqlFileAsString.endsWith(CQL_STATEMENT_TERMINATOR), "had a non-terminated cql line: %s", cqlFileAsString);
return splitByStatementTerminator(cqlFileAsString);
processor.check();

return processor.getResult();
}

private static List<String> splitByStatementTerminator(String cqlStatements) {
List<String> statementList = new ArrayList<>();
String candidateStatement = "";
private static class LineProcessor {
private static final char CQL_STATEMENT_STRING_DELIMITER = '\'';
private static final String CQL_STATEMENT_TERMINATOR = ";";
private static final String CQL_COMMENT_DOUBLE_HYPEN = "--";
private static final String CQL_MULTI_LINE_COMMENT_OPEN = "/*";
private static final String CQL_MULTI_LINE_COMMENT_CLOSE = "*/";
private static final Pattern CQL_MULTI_LINE_COMMENT_PATTERN = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);
private static final String EMPTY_STR = "";

private enum State {
INIT,
FIND_EOS,
IS_MULTI_LINE_COMMENT,
IS_OPEN_STMT,
IS_OPEN_VALUE_EXP,
IS_CLOSE_STMT;
}

List<String> statements;
State curState = State.INIT;
StringBuilder curStmt;

void process(String original) throws IOException {
switch (curState) {
case INIT:
init(original);

break;

case FIND_EOS:
case IS_OPEN_STMT:
findStatement(original);

break;

case IS_OPEN_VALUE_EXP:
findValueExpression(original);

break;

case IS_MULTI_LINE_COMMENT:
findMultilineComment(original);

break;

case IS_CLOSE_STMT:
closedStatement(original);

break;
}
}

private void init(String original) throws IOException {
if (statements == null) {
statements = new ArrayList<>();
}
curState = State.FIND_EOS;
curStmt = new StringBuilder();
process(original);
}

private void findStatement(String original) throws IOException {
String line = CharMatcher.WHITESPACE.trimFrom(original);

if (line.startsWith(CQL_COMMENT_DOUBLE_HYPEN) || line.isEmpty()) {
return;
}

if (line.startsWith(CQL_MULTI_LINE_COMMENT_OPEN)) {
curState = State.IS_MULTI_LINE_COMMENT;
return;
}

if (line.endsWith(CQL_STATEMENT_TERMINATOR)) {
curStmt.append(" ").append(line.substring(0, line.length() - 1));
statements.add(CharMatcher.WHITESPACE.trimFrom(curStmt.toString()));
curState = State.IS_CLOSE_STMT;
process(original);
return;
}

// A semicolon preceded by an odd number of single quotes must be within a string,
// and therefore is not a statement terminator
if (CharMatcher.is(CQL_STATEMENT_STRING_DELIMITER).countIn(line) % 2 != 0) {
curState = State.IS_OPEN_VALUE_EXP;
curStmt.append(" ").append(CharMatcher.WHITESPACE.trimLeadingFrom(original));
return;
}

int pos = line.indexOf(CQL_COMMENT_DOUBLE_HYPEN);
if (pos != -1) {
curStmt.append(line.substring(0, pos));
return;
}

for (String statementFragment : cqlStatements.split(CQL_STATEMENT_TERMINATOR)) {
candidateStatement += statementFragment;
// A semicolon preceded by an odd number of single quotes must be within a string, and therefore is not a statement terminator
if (CharMatcher.is(CQL_STATEMENT_STRING_DELIMITER).countIn(candidateStatement) % 2 == 0) {
statementList.add(candidateStatement);
candidateStatement = "";
Matcher matcher = CQL_MULTI_LINE_COMMENT_PATTERN.matcher(line);
if (matcher.find()) {
curStmt.append(matcher.replaceAll(EMPTY_STR));
return;
}

if (State.IS_OPEN_STMT.equals(curState)) {
curStmt.append(" ").append(line);
} else {
candidateStatement += CQL_STATEMENT_TERMINATOR;
curState = State.IS_OPEN_STMT;
curStmt.append(line);
}
}
return statementList;

private void findValueExpression(String original) {
if (CharMatcher.is(CQL_STATEMENT_STRING_DELIMITER).countIn(original) % 2 != 0) {
curStmt.append(original);
curState = State.FIND_EOS;
return;
}

curStmt.append(original);
}

private void findMultilineComment(String original) {
if (CharMatcher.WHITESPACE.trimTrailingFrom(original).endsWith(CQL_MULTI_LINE_COMMENT_CLOSE))
curState = State.FIND_EOS;
}

private void closedStatement(String original) {
LOGGER.trace("CQL parsed: {}", original);
curState = State.INIT;
}

private void check() {
checkState(State.IS_CLOSE_STMT.equals(curState) || State.INIT.equals(curState), "File had a non-terminated cql line");
}

List<String> getResult() {
return statements;
}
}
}
57 changes: 57 additions & 0 deletions src/test/java/uk/sky/cqlmigrate/CqlFileParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import static org.hamcrest.Matchers.equalToIgnoringWhiteSpace;
import static org.hamcrest.Matchers.hasSize;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;

public class CqlFileParserTest {
Expand All @@ -30,4 +31,60 @@ public void shouldCopeWithSemicolonsInStrings() throws Exception {
assertThat(cqlStatements, hasSize(1));
assertThat(cqlStatements.get(0), equalToIgnoringWhiteSpace(expectedStatement));
}

@Test
public void shouldIgnoreCommentsAndNormolizeAndPreserveNewlineInValuesExp() throws Exception {
//given
Path cqlPath = getResourcePath("cql_rolegraphs_one/2015-08-16-12:05-statement-with-comments.cql");

//when
List<String> cqlStatements = CqlFileParser.getCqlStatementsFrom(cqlPath);

//then
String expectedStatement;
assertThat(cqlStatements, hasSize(4));

expectedStatement = "INSERT into role_graphs (provider, graphml)\n" +
" VALUES ('SKY', 'some text; some more text')";

assertThat(cqlStatements.get(0), equalToIgnoringWhiteSpace(expectedStatement));

expectedStatement = "CREATE TABLE role_graphs_sql(" +
"provider text, " +
"graphml text, " +
"settings text, " +
"PRIMARY KEY (provider)" +
") WITH comment='test table role_graphs_sql'";

assertThat(cqlStatements.get(1), equalToIgnoringWhiteSpace(expectedStatement));

expectedStatement = "INSERT into role_graphs_sql (provider, graphml)" +
" VALUES ('SKY', 'some ... \n" +
"-- it''s comment\n" +
"-- Created by yEd 3.12.2 /* <key for=\"graphml\" id=\"d0\" yfiles.type=\"resources\"/> */ <test>''</test>\n" +
" the end ')";

assertEquals(cqlStatements.get(2), expectedStatement);

expectedStatement = "INSERT into role_graphs_sql (provider, graphml, settings)" +
" VALUES ('EARTH', '', ' the end ')";

assertEquals(cqlStatements.get(3), expectedStatement);
}

@Test(expected = IllegalStateException.class)
public void shouldExceptionOnMissingSemicolon() throws Exception {
Path cqlPath = getResourcePath("cql_bootstrap_missing_semicolon/bootstrap.cql");
CqlFileParser.getCqlStatementsFrom(cqlPath);
}

@Test
public void shouldCopeWithSingleLineStatement() throws Exception {
Path cqlPath = getResourcePath("cql_consistency_level/2016-02-12-11_30-create-table.cql");

List<String> cqlStatements = CqlFileParser.getCqlStatementsFrom(cqlPath);
String expectedStatement = "CREATE TABLE consistency_test (column1 text primary key, column2 text)";
assertThat(cqlStatements, hasSize(1));
assertEquals(cqlStatements.get(0), expectedStatement);
}
}
2 changes: 1 addition & 1 deletion src/test/java/uk/sky/cqlmigrate/CqlMigratorImplTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class CqlMigratorImplTest {

@BeforeClass
public static void setupCassandra() throws ConfigurationException, IOException, TTransportException, InterruptedException {
EmbeddedCassandraServerHelper.startEmbeddedCassandra(EmbeddedCassandraServerHelper.CASSANDRA_RNDPORT_YML_FILE);
EmbeddedCassandraServerHelper.startEmbeddedCassandra(EmbeddedCassandraServerHelper.CASSANDRA_RNDPORT_YML_FILE, 30000);
binaryPort = EmbeddedCassandraServerHelper.getNativeTransportPort();

cluster = Cluster.builder().addContactPoints(CASSANDRA_HOSTS).withPort(binaryPort).withCredentials(username, password).build();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
INSERT into role_graphs (provider, graphml)
VALUES ('SKY', 'some text; some more text');

-- it's role_graphs_sql table
CREATE TABLE role_graphs_sql(
provider text, -- provider name
graphml text, ---graph markup language
settings text, /** xml markup language **/
PRIMARY KEY (provider)-- PK
) WITH comment='test table role_graphs_sql' --
-- EOF
;

INSERT into role_graphs_sql (provider, graphml)
VALUES ('SKY', 'some ...
-- it''s comment
-- Created by yEd 3.12.2 /* <key for="graphml" id="d0" yfiles.type="resources"/> */ <test>''</test>
the end ')
;

/* /* Multi-line comment */
INSERT into role_graphs_sql (provider, graphml, settings)
VALUES ('EARTH', '', '<empty />');
*/

INSERT into role_graphs_sql (provider, graphml, settings)
VALUES ('EARTH', '',
' the end ');

0 comments on commit a1eb1f4

Please sign in to comment.