From 8fc1e1ca6f67cf0aa9b89c0463d773afd97b12af Mon Sep 17 00:00:00 2001 From: David Kunzmann Date: Tue, 26 Nov 2024 12:15:55 +0100 Subject: [PATCH] SONARPY-2072: Compute correct line location when the cells ends with an empty line --- .../plugins/python/IpynbNotebookParser.java | 17 ++++- .../python/IpynbNotebookParserTest.java | 74 ++++++++++++++----- .../plugins/python/notebook_extra_line.ipynb | 2 +- .../notebook_extra_line_compressed.ipynb | 1 + .../python/notebook_extra_line_explicit.ipynb | 46 ++++++++++++ 5 files changed, 120 insertions(+), 20 deletions(-) create mode 100644 sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line_compressed.ipynb create mode 100644 sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line_explicit.ipynb diff --git a/sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java b/sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java index 3b524682a7..22635f84e7 100644 --- a/sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java +++ b/sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java @@ -39,7 +39,6 @@ public class IpynbNotebookParser { private static final Set ACCEPTED_LANGUAGE = Set.of("python", "ipython"); - public static Optional parseNotebook(PythonInputFile inputFile) { try { return new IpynbNotebookParser(inputFile).parse(); @@ -190,6 +189,14 @@ private static NotebookParsingData parseSourceArray(int startLine, JsonParser jP } if (!lastSourceLine.endsWith("\n")) { cellData.appendToSource("\n"); + } else { + // if the last string of the array ends with a newline character we should add this new line to our representation + var newLineLocation = new IPythonLocation( + tokenLocation.getLineNr(), + tokenLocation.getColumnNr(), + List.of(new EscapeCharPositionInfo(tokenLocation.getColumnNr(), 1)), + false); + cellData.addLineToSource("\n", newLineLocation); } // Account for the last cell delimiter cellData.addDelimiterToSource(SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER + "\n", tokenLocation.getLineNr(), tokenLocation.getColumnNr()); @@ -212,8 +219,14 @@ private static NotebookParsingData parseSourceMultilineString(int startLine, Jso previousLen += line.length() + 2; previousExtraChars += currentCount; } + + if (sourceLine.endsWith("\n")) { + var column = tokenLocation.getColumnNr() + previousExtraChars + previousLen; + cellData.addLineToSource("\n", new IPythonLocation(tokenLocation.getLineNr(), column, List.of(new EscapeCharPositionInfo(column, 1)), true)); + previousLen += 2; + } // Account for the last cell delimiter - cellData.addDelimiterToSource(SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER + "\n", tokenLocation.getLineNr(), tokenLocation.getColumnNr()); + cellData.addDelimiterToSource(SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER + "\n", tokenLocation.getLineNr(), tokenLocation.getColumnNr() + previousExtraChars + previousLen); return cellData; } diff --git a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java index f02fa951b4..0e13fcafc6 100644 --- a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java +++ b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java @@ -50,26 +50,26 @@ void testParseNotebook() throws IOException { assertThat(result.contents()).hasLineCount(27); assertThat(StringUtils.countMatches(result.contents(), IpynbNotebookParser.SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER)) .isEqualTo(7); - assertThat(result.locationMap()).extracting(map -> map.get(1)).isEqualTo(new IPythonLocation(17, 5)); + assertThat(result.locationMap()).extractingByKey(1).isEqualTo(new IPythonLocation(17, 5)); //" print \"not none\"\n" - assertThat(result.locationMap()).extracting(map -> map.get(3)).isEqualTo(new IPythonLocation(19, 5, + assertThat(result.locationMap()).extractingByKey(3).isEqualTo(new IPythonLocation(19, 5, mapToColumnMappingList(Map.of(10, 1, 19, 1)))); //"source": "#Some code\nprint(\"hello world\\n\")", - assertThat(result.locationMap()).extracting(map -> map.get(16)).isEqualTo(new IPythonLocation(64, 14, List.of(), true)); - assertThat(result.locationMap()).extracting(map -> map.get(17)).isEqualTo(new IPythonLocation(64, 26, mapToColumnMappingList(Map.of(6 + assertThat(result.locationMap()).extractingByKey(16).isEqualTo(new IPythonLocation(64, 14, List.of(), true)); + assertThat(result.locationMap()).extractingByKey(17).isEqualTo(new IPythonLocation(64, 26, mapToColumnMappingList(Map.of(6 , 1, 18, 1, 20, 1)), true)); //"source": "print(\"My\\ntext\")\nprint(\"Something else\\n\")" - assertThat(result.locationMap()).extracting(map -> map.get(22)).isEqualTo(new IPythonLocation(83, 14, mapToColumnMappingList(Map.of(6 + assertThat(result.locationMap()).extractingByKey(22).isEqualTo(new IPythonLocation(83, 14, mapToColumnMappingList(Map.of(6 , 1, 9, 1, 15, 1)), true)); - assertThat(result.locationMap()).extracting(map -> map.get(23)).isEqualTo(new IPythonLocation(83, 36, mapToColumnMappingList(Map.of(6 + assertThat(result.locationMap()).extractingByKey(23).isEqualTo(new IPythonLocation(83, 36, mapToColumnMappingList(Map.of(6 , 1, 21, 1, 23, 1)), true)); //"source": "a = \"A bunch of characters \\n \\f \\r \\ \"\nb = None" - assertThat(result.locationMap()).extracting(map -> map.get(25)) + assertThat(result.locationMap()).extractingByKey(25) .isEqualTo(new IPythonLocation(90, 14, mapToColumnMappingList(Map.of(4, 1, 27, 1, 30, 1, 33, 1, 36, 1, 39, 1)), true)); - assertThat(result.locationMap()).extracting(map -> map.get(26)).isEqualTo(new IPythonLocation(90, 62, List.of(), true)); - // last line with the cell delimiter which contains the EOF token - assertThat(result.locationMap()).extracting(map -> map.get(27)).isEqualTo(new IPythonLocation(90, 14, List.of())); + assertThat(result.locationMap()).extractingByKey(26).isEqualTo(new IPythonLocation(90, 62, List.of(), true)); + // last line with the cell delimiter which contains the EOF token the column of this token should be at the end of the previous line + assertThat(result.locationMap()).extractingByKey(27).isEqualTo(new IPythonLocation(90, 72, List.of(), false)); } @Test @@ -111,14 +111,15 @@ void testParseNotebookWithEmptyLines() throws IOException { var result = resultOptional.get(); - assertThat(result.locationMap().keySet()).hasSize(4); - assertThat(result.contents()).hasLineCount(4); + assertThat(result.locationMap().keySet()).hasSize(5); + assertThat(result.contents()).hasLineCount(5); assertThat(StringUtils.countMatches(result.contents(), IpynbNotebookParser.SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER)) .isEqualTo(1); - assertThat(result.locationMap()).extracting(map -> map.get(3)).isEqualTo(new IPythonLocation(11, 5)); + assertThat(result.locationMap()).extractingByKey(4).extracting(IPythonLocation::line).isEqualTo(11); + assertThat(result.locationMap()).extractingByKey(4).extracting(IPythonLocation::column).isEqualTo(5); // last line with the cell delimiter which contains the EOF token - assertThat(result.locationMap()).extracting(map -> map.get(4)).isEqualTo(new IPythonLocation(11, 5)); + assertThat(result.locationMap()).extractingByKey(5).isEqualTo(new IPythonLocation(11, 5)); } @Test @@ -149,7 +150,25 @@ void testParseNotebookWithNoLanguage() { } @Test - void testParseNotebookWithExtraLineEndInArray() throws IOException { + void testDifferentJsonRepresentationOfEmptyLine() throws IOException { + var inputFile = createInputFile(baseDir, "notebook_extra_line.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN); + var inputFileExtraLineExplicit = createInputFile(baseDir, "notebook_extra_line_explicit.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN); + var inputFileExtraLineExplicitSingleLine = createInputFile(baseDir, "notebook_extra_line_compressed.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN); + + var notebook = IpynbNotebookParser.parseNotebook(inputFile); + var notebookExtraLineExplicit = IpynbNotebookParser.parseNotebook(inputFileExtraLineExplicit); + var notebookExtraLineExplicitSingleLine = IpynbNotebookParser.parseNotebook(inputFileExtraLineExplicitSingleLine); + + assertThat(notebook).isPresent(); + assertThat(notebookExtraLineExplicit).isPresent(); + assertThat(notebookExtraLineExplicitSingleLine).isPresent(); + + assertThat(notebook.get().contents()).isEqualTo(notebookExtraLineExplicit.get().contents()); + assertThat(notebook.get().contents()).isEqualTo(notebookExtraLineExplicitSingleLine.get().contents()); + } + + @Test + void testParseNotebookEndingWithEmptyLine() throws IOException { var inputFile = createInputFile(baseDir, "notebook_extra_line.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN); var resultOptional = IpynbNotebookParser.parseNotebook(inputFile); @@ -157,8 +176,29 @@ void testParseNotebookWithExtraLineEndInArray() throws IOException { assertThat(resultOptional).isPresent(); var result = resultOptional.get(); - assertThat(result.locationMap()).hasSize(3); - assertThat(result.contents()).hasLineCount(3); + assertThat(result.locationMap()).hasSize(4); + assertThat(result.contents()).hasLineCount(4); + // The empty line + assertThat(result.locationMap()).extractingByKey(3).extracting(IPythonLocation::line).isEqualTo(19); + assertThat(result.locationMap()).extractingByKey(3).extracting(IPythonLocation::column).isEqualTo(5); + // The delimiter + assertThat(result.locationMap()).extractingByKey(4).extracting(IPythonLocation::line).isEqualTo(19); + assertThat(result.locationMap()).extractingByKey(4).extracting(IPythonLocation::column).isEqualTo(5); + + inputFile = createInputFile(baseDir, "notebook_extra_line_compressed.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN); + resultOptional = IpynbNotebookParser.parseNotebook(inputFile); + + assertThat(resultOptional).isPresent(); + + result = resultOptional.get(); + assertThat(result.locationMap()).hasSize(4); + assertThat(result.contents()).hasLineCount(4); + // The empty line + assertThat(result.locationMap()).extractingByKey(3).extracting(IPythonLocation::line).isEqualTo(1); + assertThat(result.locationMap()).extractingByKey(3).extracting(IPythonLocation::column).isEqualTo(317); + // The delimiter is added after the empty line + assertThat(result.locationMap()).extractingByKey(4).extracting(IPythonLocation::line).isEqualTo(1); + assertThat(result.locationMap()).extractingByKey(4).extracting(IPythonLocation::column).isEqualTo(319); } @Test diff --git a/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line.ipynb b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line.ipynb index 75b69cf888..280b355a29 100644 --- a/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line.ipynb +++ b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line.ipynb @@ -16,7 +16,7 @@ ], "source": [ "#correct += (predicted == y).sum().item()\n", - "#print(f'Epoch {epoch}: {correct / total}')\n" + "print(f'Epoch {epoch}: {correct / total}')\n" ], "id": "6cbd92ef6319ca9a" } diff --git a/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line_compressed.ipynb b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line_compressed.ipynb new file mode 100644 index 0000000000..00a5ebe62d --- /dev/null +++ b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line_compressed.ipynb @@ -0,0 +1 @@ +{ "cells": [{"cell_type": "code","execution_count": 5,"metadata": {},"outputs": [{"name": "stdout","output_type": "stream","text": ["Files already downloaded and verified\n","Files already downloaded and verified\n"]}],"source": "#correct += (predicted == y).sum().item()\nprint(f'Epoch {epoch}: {correct / total}')\n","id": "6cbd92ef6319ca9a" } ], "metadata": { "kernelspec": {"display_name": "Python 3","language": "python","name": "python3" }, "language_info": {"codemirror_mode": {"name": "ipython","version": 2},"file_extension": ".py","mimetype": "text/x-python","name": "python","nbconvert_exporter": "python","pygments_lexer": "ipython2","version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5} diff --git a/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line_explicit.ipynb b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line_explicit.ipynb new file mode 100644 index 0000000000..81dd2174d6 --- /dev/null +++ b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_extra_line_explicit.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Files already downloaded and verified\n", + "Files already downloaded and verified\n" + ] + } + ], + "source": [ + "#correct += (predicted == y).sum().item()\n", + "print(f'Epoch {epoch}: {correct / total}')\n", + "" + ], + "id": "6cbd92ef6319ca9a" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}