From c6b0cbcd95173b0df1aa3a929327e2e4813b41ee Mon Sep 17 00:00:00 2001 From: Michael Pollmeier Date: Fri, 10 Jan 2025 13:35:23 +0100 Subject: [PATCH] dot export: fix file format (#289) as discussed in joernio/joern#5158 I didn't get any feedback if this really solves their issue, but this is certainly better than before --- build.sbt | 8 ++- .../flatgraph/formats/dot/DotExporter.scala | 48 ++++++++++------ .../formats/dot/DotExporterTests.scala | 56 +++++++++++++++++++ .../flatgraph/formats/dot/DotTests.scala | 32 ----------- 4 files changed, 91 insertions(+), 53 deletions(-) create mode 100644 tests/src/test/scala/flatgraph/formats/dot/DotExporterTests.scala delete mode 100644 tests/src/test/scala/flatgraph/formats/dot/DotTests.scala diff --git a/build.sbt b/build.sbt index e56bbff5..ef6c3490 100644 --- a/build.sbt +++ b/build.sbt @@ -2,9 +2,10 @@ name := "flatgraph" ThisBuild / organization := "io.joern" ThisBuild / scalaVersion := scala3 -val slf4jVersion = "2.0.16" val scala3 = "3.5.2" val scala2_12 = "2.12.20" +val slf4jVersion = "2.0.16" +val commonsTextVersion = "1.13.0" /** Only the below listed projects are included in things like `sbt compile`. * We explicitly want to exclude `benchmarks` which requires qwiet.ai / shiftleft @@ -51,6 +52,7 @@ lazy val formats = project name := "flatgraph-formats", libraryDependencies ++= Seq( "com.github.tototoshi" %% "scala-csv" % "2.0.0", + "org.apache.commons" % "commons-text" % commonsTextVersion, "org.scala-lang.modules" %% "scala-xml" % "2.3.0", "io.spray" %% "spray-json" % "1.3.6", "com.github.scopt" %% "scopt" % "4.1.0", @@ -81,7 +83,7 @@ lazy val domainClassesGenerator_3 = project libraryDependencies ++= Seq( "org.slf4j" % "slf4j-simple" % slf4jVersion % Optional, "com.lihaoyi" %% "os-lib" % "0.9.1", - "org.apache.commons" % "commons-text" % "1.10.0", + "org.apache.commons" % "commons-text" % commonsTextVersion, "com.github.scopt" %% "scopt" % "4.1.0", ("org.scalameta" %% "scalafmt-dynamic" % "3.7.17").cross(CrossVersion.for3Use2_13), ), @@ -97,7 +99,7 @@ lazy val domainClassesGenerator_2_12 = project libraryDependencies ++= Seq( "org.slf4j"% "slf4j-simple" % slf4jVersion % Optional, "com.lihaoyi" %% "os-lib" % "0.9.1", - "org.apache.commons" % "commons-text" % "1.12.0", + "org.apache.commons" % "commons-text" % commonsTextVersion, "com.github.scopt" %% "scopt" % "4.1.0", "org.scalameta" %% "scalafmt-dynamic" % "3.7.17", ), diff --git a/formats/src/main/scala/flatgraph/formats/dot/DotExporter.scala b/formats/src/main/scala/flatgraph/formats/dot/DotExporter.scala index 07858e72..eece059f 100644 --- a/formats/src/main/scala/flatgraph/formats/dot/DotExporter.scala +++ b/formats/src/main/scala/flatgraph/formats/dot/DotExporter.scala @@ -1,10 +1,14 @@ package flatgraph.formats.dot import flatgraph.formats.{ExportResult, Exporter, iterableForList, resolveOutputFileSingle} -import flatgraph.{Accessors, Edge, GNode, Graph, Schema} +import flatgraph.{Accessors, Edge, GNode, Schema} import java.nio.file.{Files, Path} -import scala.jdk.CollectionConverters.MapHasAsScala +import org.apache.commons.text.StringEscapeUtils +import org.apache.commons.text.translate.LookupTranslator + +import java.util.Collections +import scala.jdk.CollectionConverters.MapHasAsJava import scala.util.Using /** Exports flatgraph to graphviz dot/gv file @@ -12,11 +16,25 @@ import scala.util.Using * Note: GraphML doesn't natively support list property types, so we fake it by encoding it as a `;` delimited string. If you import this * into a different database, you'll need to parse that separately. * + * Export rules for dot format as per https: //github.com/joernio/joern/issues/5158 1) If the attribute value contains special characters + * such as spaces,<,>,=, etc., it must be enclosed in double quotation marks. Otherwise, it will cause syntax errors. 2) Graphviz requires + * that the node ID must be a valid identifier. If the node ID is a pure number (such as 120259084301), it needs to be enclosed in double + * quotation marks, otherwise it will be mistaken for an integer constant. 3) The attribute value contains special characters such as(such + * as CODE=""), which need to be enclosed in quotation marks or escaped in some cases. 4) In Graphviz's. dot file, it is best to use + * semicolons for each node definition, edge definition, and attribute definition; ending. Your file is missing semicolons. + * * https://en.wikipedia.org/wiki/DOT_(graph_description_language) https://www.graphviz.org/doc/info/lang.html * http://magjac.com/graphviz-visual-editor/ https://www.slideshare.net/albazo/graphiz-using-the-dot-language */ object DotExporter extends Exporter { override def defaultFileExtension = "dot" + val EndOfLine = ';' + + private val lookupMap = Map( + """\""" -> """\\""", // \ -> \\ + "\"" -> """\"""" // " -> \" + ) + val translator = new LookupTranslator(Collections.unmodifiableMap(lookupMap.asJava)) override def runExport(schema: Schema, nodes: IterableOnce[GNode], edges: IterableOnce[Edge], outputFile: Path) = { val outFile = resolveOutputFileSingle(outputFile, s"export.$defaultFileExtension") @@ -34,31 +52,29 @@ object DotExporter extends Exporter { nodeCount += 1 val line = new StringBuffer() .append(" ") - .append(node.id) - .append(s"[label=${node.label} ") + .append(s""""${node.id}" """) + .append(s"""[label="${node.label}" """) .append( Accessors .getNodeProperties(node) .iterator .map { case (key, value) => - s"$key=${encodePropertyValue(value)}" + s"""$key="${encodePropertyValue(value)}"""" } .mkString(" ") ) .append("]") + .append(EndOfLine) writeLine(line.toString) } edges.iterator.foreach { edge => edgeCount += 1 + val propertyMaybe = Option(edge.property).map(property => s"""property="${encodePropertyValue(property)}"""").getOrElse("") val line = new StringBuffer() - .append(s" ${edge.src.id()} -> ${edge.dst.id()} ") - .append(s"[label=${edge.label} ") - - if (edge.property != null) - line.append(s"property=${encodePropertyValue(edge.property)}") - - line.append("]") + .append(s""" "${edge.src.id()}" -> "${edge.dst.id()}"""") + .append(s""" [label="${edge.label}" $propertyMaybe]""") + .append(EndOfLine) writeLine(line.toString) } @@ -71,13 +87,9 @@ object DotExporter extends Exporter { private def encodePropertyValue(value: Any): String = { value match { case value: String => - val escaped = value - .replace("""\""", """\\""") // escape escape chars - this should come first - .replace("\"", "\\\"") // escape double quotes, because we use them to enclose strings - s"\"$escaped\"" + StringEscapeUtils.builder(translator).escape(value).toString case list if iterableForList.isDefinedAt(list) => - val values = iterableForList(list).mkString(";") - s"\"$values\"" + iterableForList(list).map(encodePropertyValue).mkString(";") case value => value.toString } } diff --git a/tests/src/test/scala/flatgraph/formats/dot/DotExporterTests.scala b/tests/src/test/scala/flatgraph/formats/dot/DotExporterTests.scala new file mode 100644 index 00000000..7be8e411 --- /dev/null +++ b/tests/src/test/scala/flatgraph/formats/dot/DotExporterTests.scala @@ -0,0 +1,56 @@ +package flatgraph.formats.dot + +import better.files.* +import flatgraph.* +import org.scalatest.matchers.should.Matchers.* +import org.scalatest.wordspec.AnyWordSpec +import testdomains.generic.GenericDomain +import testdomains.generic.edges.ConnectedTo +import testdomains.generic.nodes.NewNodeA + +class DotExporterTests extends AnyWordSpec { + + "Exporter should export valid dot" in { + val graph = GenericDomain.empty.graph + val node1 = NewNodeA() + .stringMandatory("regular string") + .stringOptional(""" [escapeMe2] escape=Me3 escape"Me4 escape\Me5 """) + .stringList(Seq("one", "two")) + val node2 = NewNodeA().intMandatory(1).intOptional(2).intList(Seq(10, 11)) + + DiffGraphApplier.applyDiff( + graph, + GenericDomain.newDiffGraphBuilder + .addEdge(node1, node2, ConnectedTo.Label, "edge property") + ) + + File.usingTemporaryDirectory(getClass.getName) { exportRootDirectory => + val exportResult = DotExporter.runExport(graph, exportRootDirectory.pathAsString) + exportResult.nodeCount shouldBe 2 + exportResult.edgeCount shouldBe 1 + val Seq(exportedFile) = exportResult.files + + val result = better.files.File(exportedFile).contentAsString.trim + + /* Export rules for dot format as per https: //github.com/joernio/joern/issues/5158 : + * 1) If the attribute value contains special characters such as spaces,<,>,=, etc., it must be enclosed in double quotation marks. + * Otherwise, it will cause syntax errors. + * 2) Graphviz requires that the node ID must be a valid identifier. If the node ID is a pure number (such as 120259084301), + * it needs to be enclosed in double quotation marks, otherwise it will be mistaken for an integer constant. + * 3) The attribute value contains special characters such as(such as CODE=""), which need to be enclosed in quotation marks or escaped in some cases. + * 4) In Graphviz's. dot file, it is best to use semicolons for each node definition, edge definition, and attribute definition; ending. Your file is missing semicolons. + */ + + withClue(s"actual result was: `$result`") { + result.trim shouldBe + """digraph { + | "0" [label="node_a" int_mandatory="42" string_list="one;two" string_mandatory="regular string" string_optional=" [escapeMe2] escape=Me3 escape\"Me4 escape\\Me5 "]; + | "1" [label="node_a" int_list="10;11" int_mandatory="1" int_optional="2" string_mandatory=""]; + | "0" -> "1" [label="connected_to" property="edge property"]; + |} + |""".stripMargin.trim + } + } + } + +} diff --git a/tests/src/test/scala/flatgraph/formats/dot/DotTests.scala b/tests/src/test/scala/flatgraph/formats/dot/DotTests.scala deleted file mode 100644 index 32a9b4e7..00000000 --- a/tests/src/test/scala/flatgraph/formats/dot/DotTests.scala +++ /dev/null @@ -1,32 +0,0 @@ -package flatgraph.formats.dot - -import better.files.* -import flatgraph.* -import org.scalatest.matchers.should.Matchers.* -import org.scalatest.wordspec.AnyWordSpec - -class DotTests extends AnyWordSpec { - - "Exporter should export valid dot" in { - val graph = TestGraphs.createSimpleGraph().graph - - File.usingTemporaryDirectory(getClass.getName) { exportRootDirectory => - val exportResult = DotExporter.runExport(graph, exportRootDirectory.pathAsString) - exportResult.nodeCount shouldBe 2 - exportResult.edgeCount shouldBe 1 - val Seq(exportedFile) = exportResult.files - - val result = better.files.File(exportedFile).contentAsString.trim - withClue(s"actual result was: `$result`") { - result.trim shouldBe - """digraph { - | 0[label=node_a int_mandatory="42" string_list="node 1 c1;node 1 c2" string_mandatory="node 1 a" string_optional="node 1 b"] - | 1[label=node_a int_list="10;11;12" int_mandatory="1" int_optional="2" string_mandatory=""] - | 0 -> 1 [label=connected_to property="edge property"] - |} - |""".stripMargin.trim - } - } - } - -}