diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0801b90 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,205 @@ +# This file was automatically generated by sbt-github-actions using the +# githubWorkflowGenerate task. You should add and commit this file to +# your git repository. It goes without saying that you shouldn't edit +# this file by hand! Instead, if you wish to make changes, you should +# change your sbt build configuration to revise the workflow description +# to meet your needs, then regenerate this file. + +name: Continuous Integration + +on: + pull_request: + branches: ['**', '!update/**', '!pr/**'] + push: + branches: ['**', '!update/**', '!pr/**'] + tags: [v*] + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + +concurrency: + group: ${{ github.workflow }} @ ${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + name: Build and Test + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + scala: [2.13, 3] + java: [temurin@11] + project: [rootJVM] + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + steps: + - name: Install sbt + if: contains(runner.os, 'macos') + run: brew install sbt + + - name: Checkout current branch (full) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Java (temurin@11) + id: setup-java-temurin-11 + if: matrix.java == 'temurin@11' + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: 11 + cache: sbt + + - name: sbt update + if: matrix.java == 'temurin@11' && steps.setup-java-temurin-11.outputs.cache-hit == 'false' + run: sbt +update + + - name: Check that workflows are up to date + run: sbt githubWorkflowCheck + + - run: yarn + + - name: Check headers and formatting + if: matrix.java == 'temurin@11' && matrix.os == 'ubuntu-latest' + run: sbt 'project ${{ matrix.project }}' '++ ${{ matrix.scala }}' headerCheckAll scalafmtCheckAll 'project /' scalafmtSbtCheck + + - name: Test + run: sbt 'project ${{ matrix.project }}' '++ ${{ matrix.scala }}' test + + - name: Check binary compatibility + if: matrix.java == 'temurin@11' && matrix.os == 'ubuntu-latest' + run: sbt 'project ${{ matrix.project }}' '++ ${{ matrix.scala }}' mimaReportBinaryIssues + + - name: Generate API documentation + if: matrix.java == 'temurin@11' && matrix.os == 'ubuntu-latest' + run: sbt 'project ${{ matrix.project }}' '++ ${{ matrix.scala }}' doc + + - name: Make target directories + if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') + run: mkdir -p bindingsPython/.jvm/target core/.jvm/target sbtPlugin/.jvm/target project/target + + - name: Compress target directories + if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') + run: tar cf targets.tar bindingsPython/.jvm/target core/.jvm/target sbtPlugin/.jvm/target project/target + + - name: Upload target directories + if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') + uses: actions/upload-artifact@v4 + with: + name: target-${{ matrix.os }}-${{ matrix.java }}-${{ matrix.scala }}-${{ matrix.project }} + path: targets.tar + + publish: + name: Publish Artifacts + needs: [build] + if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') + strategy: + matrix: + os: [ubuntu-latest] + java: [temurin@11] + runs-on: ${{ matrix.os }} + steps: + - name: Install sbt + if: contains(runner.os, 'macos') + run: brew install sbt + + - name: Checkout current branch (full) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Java (temurin@11) + id: setup-java-temurin-11 + if: matrix.java == 'temurin@11' + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: 11 + cache: sbt + + - name: sbt update + if: matrix.java == 'temurin@11' && steps.setup-java-temurin-11.outputs.cache-hit == 'false' + run: sbt +update + + - name: Download target directories (2.13, rootJVM) + uses: actions/download-artifact@v4 + with: + name: target-${{ matrix.os }}-${{ matrix.java }}-2.13-rootJVM + + - name: Inflate target directories (2.13, rootJVM) + run: | + tar xf targets.tar + rm targets.tar + + - name: Download target directories (3, rootJVM) + uses: actions/download-artifact@v4 + with: + name: target-${{ matrix.os }}-${{ matrix.java }}-3-rootJVM + + - name: Inflate target directories (3, rootJVM) + run: | + tar xf targets.tar + rm targets.tar + + - name: Import signing key + if: env.PGP_SECRET != '' && env.PGP_PASSPHRASE == '' + env: + PGP_SECRET: ${{ secrets.PGP_SECRET }} + PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }} + run: echo $PGP_SECRET | base64 -d -i - | gpg --import + + - name: Import signing key and strip passphrase + if: env.PGP_SECRET != '' && env.PGP_PASSPHRASE != '' + env: + PGP_SECRET: ${{ secrets.PGP_SECRET }} + PGP_PASSPHRASE: ${{ secrets.PGP_PASSPHRASE }} + run: | + echo "$PGP_SECRET" | base64 -d -i - > /tmp/signing-key.gpg + echo "$PGP_PASSPHRASE" | gpg --pinentry-mode loopback --passphrase-fd 0 --import /tmp/signing-key.gpg + (echo "$PGP_PASSPHRASE"; echo; echo) | gpg --command-fd 0 --pinentry-mode loopback --change-passphrase $(gpg --list-secret-keys --with-colons 2> /dev/null | grep '^sec:' | cut --delimiter ':' --fields 5 | tail -n 1) + + - name: Publish + env: + SONATYPE_USERNAME: ${{ secrets.SONATYPE_USERNAME }} + SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} + SONATYPE_CREDENTIAL_HOST: ${{ secrets.SONATYPE_CREDENTIAL_HOST }} + run: sbt tlCiRelease + + dependency-submission: + name: Submit Dependencies + if: github.event_name != 'pull_request' + strategy: + matrix: + os: [ubuntu-latest] + java: [temurin@11] + runs-on: ${{ matrix.os }} + steps: + - name: Install sbt + if: contains(runner.os, 'macos') + run: brew install sbt + + - name: Checkout current branch (full) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Java (temurin@11) + id: setup-java-temurin-11 + if: matrix.java == 'temurin@11' + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: 11 + cache: sbt + + - name: sbt update + if: matrix.java == 'temurin@11' && steps.setup-java-temurin-11.outputs.cache-hit == 'false' + run: sbt +update + + - name: Submit Dependencies + uses: scalacenter/sbt-dependency-submission@v2 + with: + modules-ignore: rootjs_2.13 rootjs_3 rootjvm_2.13 rootjvm_3 rootnative_2.13 rootnative_3 tests_2.13 tests_3 + configs-ignore: test scala-tool scala-doc-tool test-internal diff --git a/.github/workflows/clean.yml b/.github/workflows/clean.yml new file mode 100644 index 0000000..547aaa4 --- /dev/null +++ b/.github/workflows/clean.yml @@ -0,0 +1,59 @@ +# This file was automatically generated by sbt-github-actions using the +# githubWorkflowGenerate task. You should add and commit this file to +# your git repository. It goes without saying that you shouldn't edit +# this file by hand! Instead, if you wish to make changes, you should +# change your sbt build configuration to revise the workflow description +# to meet your needs, then regenerate this file. + +name: Clean + +on: push + +jobs: + delete-artifacts: + name: Delete Artifacts + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Delete artifacts + run: | + # Customize those three lines with your repository and credentials: + REPO=${GITHUB_API_URL}/repos/${{ github.repository }} + + # A shortcut to call GitHub API. + ghapi() { curl --silent --location --user _:$GITHUB_TOKEN "$@"; } + + # A temporary file which receives HTTP response headers. + TMPFILE=/tmp/tmp.$$ + + # An associative array, key: artifact name, value: number of artifacts of that name. + declare -A ARTCOUNT + + # Process all artifacts on this repository, loop on returned "pages". + URL=$REPO/actions/artifacts + while [[ -n "$URL" ]]; do + + # Get current page, get response headers in a temporary file. + JSON=$(ghapi --dump-header $TMPFILE "$URL") + + # Get URL of next page. Will be empty if we are at the last page. + URL=$(grep '^Link:' "$TMPFILE" | tr ',' '\n' | grep 'rel="next"' | head -1 | sed -e 's/.*.*//') + rm -f $TMPFILE + + # Number of artifacts on this page: + COUNT=$(( $(jq <<<$JSON -r '.artifacts | length') )) + + # Loop on all artifacts on this page. + for ((i=0; $i < $COUNT; i++)); do + + # Get name of artifact and count instances of this name. + name=$(jq <<<$JSON -r ".artifacts[$i].name?") + ARTCOUNT[$name]=$(( $(( ${ARTCOUNT[$name]} )) + 1)) + + id=$(jq <<<$JSON -r ".artifacts[$i].id?") + size=$(( $(jq <<<$JSON -r ".artifacts[$i].size_in_bytes?") )) + printf "Deleting '%s' #%d, %'d bytes\n" $name ${ARTCOUNT[$name]} $size + ghapi -X DELETE $REPO/actions/artifacts/$id + done + done diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c26700 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +**/target +.direnv/ +.scala-build/ +**/target +.bsp/ + +node_modules/ +result +.vscode/ +**/.DS_Store diff --git a/.sbtopts b/.sbtopts new file mode 100644 index 0000000..81a0547 --- /dev/null +++ b/.sbtopts @@ -0,0 +1 @@ +-J-Xmx8G diff --git a/.scalafmt.conf b/.scalafmt.conf new file mode 100644 index 0000000..8df0289 --- /dev/null +++ b/.scalafmt.conf @@ -0,0 +1,32 @@ +runner.dialect = "scala213" +version = 3.5.8 +maxColumn = 100 +align.preset = some + +newlines.beforeMultiline = unfold +newlines.topLevelStatements = [before, after] +newlines.topLevelStatementsMinBreaks = 2 +newlines.implicitParamListModifierForce = [before] +continuationIndent.defnSite = 2 +continuationIndent.extendSite = 2 +optIn.breakChainOnFirstMethodDot = true +includeCurlyBraceInSelectChains = true +includeNoParensInSelectChains = true + +trailingCommas = "multiple" + +rewrite.rules = [ + RedundantBraces, + RedundantParens, + ExpandImportSelectors, + PreferCurlyFors +] + +runner.optimizer.forceConfigStyleMinArgCount = 3 +danglingParentheses.defnSite = true +danglingParentheses.callSite = true +danglingParentheses.exclude = [ + "`trait`" +] +verticalMultiline.newlineAfterOpenParen = true +verticalMultiline.atDefnSite = true diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..893b410 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,13 @@ + Copyright 2022 treesitter4s contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..55479c7 --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ +# treesitter4s + +Tree-sitter wrapper for Scala (JVM). Uses JNA to wrap the native library. + +## Usage + +# Work in progress note + +This project is still in the oven and under active development. +Don't assume anything will work or that anything will stay as it is right now. + +Feel free to try it if that's okay with you ;) + +```scala +libraryDependencies ++= Seq( + // Pure Scala interface - cross-compiled for JVM & JS platforms + "org.polyvariant.treesitter4s" %% "core" % version, + // Bindings for the JVM artifact. Brings in JNA and the native library. + // You probably want to use this one. + "org.polyvariant.treesitter4s" %% "bindings" % version, + // Language support for a specific language. + // There's active work to split these out to separate artifacts. + // "org.polyvariant.treesitter4s" %% "language-scala" % version, + // "org.polyvariant.treesitter4s" %% "language-python" % version, +) +``` + +## Goals + +- **immutable**, read-only, Scala-friendly API +- complete, 1-1 native/Java interface via [JNA](https://github.com/java-native-access/jna) +- binary convenience: no dealing with native libraries if you're on a supported system +- extensible language support + +## Supported systems + +Support can vary, but the following platforms are considered supported: + +- macOS x86_64 +- macOS aarch64 +- Linux x86_64 +- Linux aarch64 + +CI runs on x86_64 macOS/Linux machines. Development is currently done on an aarch64 Mac. +linux-aarch64 binaries are included thanks to the magic of [Nix](https://nixos.org/) and [Nixbuild](https://nixbuild.net/), but the library isn't being tested on that platform. diff --git a/bindingsPython/.jvm/src/main/scala/org/polyvariant/treesitter4s/bindings/scala/PythonLanguageBindings.java b/bindingsPython/.jvm/src/main/scala/org/polyvariant/treesitter4s/bindings/scala/PythonLanguageBindings.java new file mode 100644 index 0000000..f739b9d --- /dev/null +++ b/bindingsPython/.jvm/src/main/scala/org/polyvariant/treesitter4s/bindings/scala/PythonLanguageBindings.java @@ -0,0 +1,34 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s.bindings.python; + +import com.sun.jna.Library; +import com.sun.jna.NativeLibrary; +import com.sun.jna.Native; +import org.polyvariant.treesitter4s.Language; + +public class PythonLanguageBindings { + + private static interface Bindings extends Library { + Language tree_sitter_python(); + } + + private static final Bindings LIBRARY = Language.loadLanguageLibrary("python", Bindings.class); + + public static final Language Python = LIBRARY.tree_sitter_python(); + +} diff --git a/build.sbt b/build.sbt new file mode 100644 index 0000000..cc8d279 --- /dev/null +++ b/build.sbt @@ -0,0 +1,122 @@ +import sjsonnew.BasicJsonProtocol + +import sjsonnew.JsonFormat + +import sbt.util.CacheImplicits._ + +ThisBuild / tlBaseVersion := "0.3" +ThisBuild / organization := "org.polyvariant.treesitter4s" +ThisBuild / organizationName := "Polyvariant" +ThisBuild / startYear := Some(2022) +ThisBuild / licenses := Seq(License.Apache2) +ThisBuild / developers := List(tlGitHubDev("kubukoz", "Jakub Kozłowski")) +ThisBuild / tlSonatypeUseLegacyHost := false +ThisBuild / githubWorkflowOSes := Seq("ubuntu-latest", "macos-latest") + +ThisBuild / tlJdkRelease := Some(11) + +ThisBuild / githubWorkflowBuild ~= (WorkflowStep.Run(commands = List("yarn")) +: _) + +def crossPlugin(x: sbt.librarymanagement.ModuleID) = compilerPlugin( + x.cross(CrossVersion.full) +) + +val compilerPlugins = List( + crossPlugin("org.polyvariant" % "better-tostring" % "0.3.17") +) + +val Scala212 = "2.12.18" +val Scala213 = "2.13.14" +val Scala3 = "3.3.3" + +ThisBuild / scalaVersion := Scala213 +ThisBuild / crossScalaVersions := Seq(Scala213, Scala3) + +ThisBuild / tlFatalWarnings := false + +val commonSettings = Seq( + libraryDependencies ++= compilerPlugins ++ Seq( + "com.disneystreaming" %%% "weaver-cats" % "0.8.4" % Test, + "com.disneystreaming" %%% "weaver-discipline" % "0.8.4" % Test, + "com.disneystreaming" %%% "weaver-scalacheck" % "0.8.4" % Test, + ) +) + +val commonJVMSettings = Seq( + Compile / doc / javacOptions -= "-Xlint:all", + Test / fork := true, + scalacOptions ++= { + if (scalaVersion.value.startsWith("2.13")) + Seq("-Wnonunit-statement") + else + Nil + }, +) + +lazy val core = crossProject(JVMPlatform) + .crossType(CrossType.Pure) + .settings( + commonSettings + ) + .enablePlugins(TreeSitter4sPlugin) + .jvmSettings( + commonJVMSettings, + libraryDependencies ++= Seq( + "net.java.dev.jna" % "jna" % "5.14.0" + ), + Compile / ts4sCompileCore := true, + ) + +lazy val bindingsPython = crossProject(JVMPlatform) + .crossType(CrossType.Pure) + .settings( + name := "language-python", + commonSettings, + Compile / ts4sGrammars += TreeSitterGrammar("python", "0.21.0"), + // example of another grammar in use + // Compile / ts4sGrammars += TreeSitterGrammar("rust", "0.21.2"), + ) + .enablePlugins(TreeSitter4sPlugin) + .dependsOn(core) + .jvmSettings(commonJVMSettings) + +lazy val tests = crossProject(JVMPlatform) + .crossType(CrossType.Pure) + .settings( + commonSettings + ) + .dependsOn(bindingsPython) + .jvmSettings(commonJVMSettings) + .enablePlugins(NoPublishPlugin) + +val sbtPlugin = crossProject(JVMPlatform) + .crossType(CrossType.Pure) + .settings( + scalaVersion := Scala212, + crossScalaVersions := Seq(Scala212), + name := "sbt-plugin", + ) + .enablePlugins(SbtPlugin) + .settings( + pluginCrossBuild / sbtVersion := { + scalaBinaryVersion.value match { + case "2.12" => "1.10.0" + } + }, + scriptedLaunchOpts := { + scriptedLaunchOpts.value ++ + Seq("-Xmx1024M", "-Dplugin.version=" + version.value) + }, + scriptedBufferLog := false, + libraryDependencies ++= Seq( + "com.lihaoyi" %% "requests" % "0.8.2", + "com.lihaoyi" %% "os-lib" % "0.10.0", + ), + ) + +lazy val root = tlCrossRootProject + .aggregate(core, bindingsPython, sbtPlugin, tests) + .settings( + Compile / doc / sources := Seq(), + sonatypeProfileName := "org.polyvariant", + ) diff --git a/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/Language.java b/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/Language.java new file mode 100644 index 0000000..c6e1e82 --- /dev/null +++ b/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/Language.java @@ -0,0 +1,48 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s; + +import com.sun.jna.PointerType; +import com.sun.jna.Pointer; +import com.sun.jna.Platform; +import com.sun.jna.Native; +import com.sun.jna.Library; +import java.io.InputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +public class Language extends PointerType { + public Language() { + super(); + } + + public Language(Pointer p) { + super(p); + } + + // utils + + public static C loadLanguageLibrary(String lang, Class clazz) { + try { + return Native.load("tree-sitter-" + lang, clazz); + } catch (UnsatisfiedLinkError e) { + throw new RuntimeException("Couldn't load library", e); + } + } + +} diff --git a/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/TreeSitterPlatform.scala b/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/TreeSitterPlatform.scala new file mode 100644 index 0000000..628523f --- /dev/null +++ b/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/TreeSitterPlatform.scala @@ -0,0 +1,37 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s + +import com.sun.jna.Native +import org.polyvariant.treesitter4s.internal.Facade +import org.polyvariant.treesitter4s.internal.TreeSitterLibrary + +protected trait TreeSitterPlatform { + + private val LIBRARY: TreeSitterLibrary = + try Native + .load( + "tree-sitter", + classOf[TreeSitterLibrary], + ) + catch { + case e: UnsatisfiedLinkError => throw new Exception("Couldn't load tree-sitter", e) + } + + def make(language: Language): TreeSitter = Facade.make(language, LIBRARY) + +} diff --git a/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/internal/Facade.scala b/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/internal/Facade.scala new file mode 100644 index 0000000..7487c8d --- /dev/null +++ b/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/internal/Facade.scala @@ -0,0 +1,144 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s.internal + +import org.polyvariant.treesitter4s +import org.polyvariant.treesitter4s.Tree +import org.polyvariant.treesitter4s.TreeSitter +import org.polyvariant.treesitter4s.Language +import java.nio.charset.StandardCharsets + +private[treesitter4s] object Facade { + + def make( + language: Language, + ts: TreeSitterLibrary, + ): TreeSitter = + new TreeSitter { + + private def mkParser() = ts.ts_parser_new() + + def parse( + source: String + ): Tree = { + + def mkTree(parserPointer: TreeSitterLibrary.Parser): TreeSitterLibrary.Tree = { + assert(ts.ts_parser_set_language(parserPointer, language), "ts_parser_set_language") + + val sourceBytes = source.getBytes(StandardCharsets.UTF_8) + ts.ts_parser_parse_string( + parserPointer, + null /* old tree */, + sourceBytes, + sourceBytes.length.toLong, + ) + } + + val parserPointer = mkParser() + + try { + val tree = mkTree(parserPointer) + try fromNative.tree(ts, tree, source) + finally ts.ts_tree_delete(tree) + } finally ts.ts_parser_delete(parserPointer) + + } + + } + + private object fromNative { + + def nodeNullCheck( + ts: TreeSitterLibrary, + node: TreeSitterLibrary.Node, + sourceFile: String, + ): Option[treesitter4s.Node] = + if (ts.ts_node_is_null(node)) + None + else + Some(fromNative.node(ts, node, sourceFile)) + + def node( + ts: TreeSitterLibrary, + underlying: TreeSitterLibrary.Node, + sourceFile: String, + ): treesitter4s.Node = { + val startByte = Math.toIntExact(ts.ts_node_start_byte(underlying).longValue()) + val endByte = Math.toIntExact(ts.ts_node_end_byte(underlying).longValue()) + + val children = + List.tabulate(Math.toIntExact(ts.ts_node_child_count(underlying))) { i => + fromNative + .node(ts, ts.ts_node_child(underlying, i.toLong), sourceFile) + } + + val fields = + children + .indices + .flatMap { i => + Option(ts.ts_node_field_name_for_child(underlying, i.toLong)) + .map(_ -> children(i)) + } + .toMap + + NodeImpl( + text = ts.ts_node_string(underlying), + children = children, + fields = fields, + tpe = ts.ts_node_type(underlying), + startByte = startByte, + endByte = endByte, + )(sourceFile = sourceFile) + } + + def tree( + ts: TreeSitterLibrary, + treePointer: TreeSitterLibrary.Tree, + sourceFile: String, + ): Tree = TreeImpl( + rootNode = fromNative.nodeNullCheck( + ts, + ts.ts_tree_root_node(treePointer), + sourceFile, + ) + ) + + } + +} + +private[treesitter4s] case class TreeImpl( + rootNode: Option[treesitter4s.Node] +) extends Tree + +private[treesitter4s] case class NodeImpl( + text: String, + tpe: String, + children: List[treesitter4s.Node], + fields: Map[String, treesitter4s.Node], + startByte: Int, + endByte: Int, +)( + private val sourceFile: String +) extends treesitter4s.Node { + + def source: String = + new String( + sourceFile.slice(startByte, endByte) + ) + +} diff --git a/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/internal/TreeSitterLibrary.java b/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/internal/TreeSitterLibrary.java new file mode 100644 index 0000000..cb78150 --- /dev/null +++ b/core/.jvm/src/main/scala/org/polyvariant/treesitter4s/internal/TreeSitterLibrary.java @@ -0,0 +1,99 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s.internal; + +import com.sun.jna.*; +import com.sun.jna.Structure.FieldOrder; +import org.polyvariant.treesitter4s.Language; + +// todo: this could be a separate library (independent of Scala versions) +public interface TreeSitterLibrary extends Library { + + // todo: extensible for languages + // long tree_sitter_scala(); + + @FieldOrder({ "context", "id", "tree" }) + public static class Node extends Structure implements Structure.ByValue { + public long[] context = new long[4]; + public Pointer id; + public Pointer tree; + } + + public static class Parser extends PointerType { + public Parser() { + super(); + } + + public Parser(Pointer p) { + super(p); + } + } + + public static class Tree extends PointerType { + public Tree() { + super(); + } + + public Tree(Pointer p) { + super(p); + } + } + + // static + + Parser ts_parser_new(); + + // parser + + void ts_parser_delete(Parser parser); + + boolean ts_parser_set_language(Parser parser, Language language); + + Tree ts_parser_parse_string(Parser parser, Pointer oldTree, byte[] string, long length); + + // tree + + String ts_node_field_name_for_child(Node node, long index); + + Node ts_tree_root_node(Tree tree); + + long ts_language_version(Language language); + + long ts_language_symbol_count(Language language); + + void ts_tree_delete(Tree tree); + + // This method is redundant, because each tree carries + // a Scala reference to its language already. + // Pointer language(Pointer tree); + + // node + + long ts_node_child_count(Node node); + + String ts_node_type(Node node); + + long ts_node_start_byte(Node node); + + long ts_node_end_byte(Node node); + + Node ts_node_child(Node node, long index); + + boolean ts_node_is_null(Node node); + + String ts_node_string(Node node); +} diff --git a/core/src/main/scala/org/polyvariant/treesitter4s/TreeSitter.scala b/core/src/main/scala/org/polyvariant/treesitter4s/TreeSitter.scala new file mode 100644 index 0000000..b0cafef --- /dev/null +++ b/core/src/main/scala/org/polyvariant/treesitter4s/TreeSitter.scala @@ -0,0 +1,39 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s + +trait TreeSitter { + + def parse(source: String): Tree + +} + +object TreeSitter extends TreeSitterPlatform + +trait Tree { + def rootNode: Option[Node] +} + +trait Node { + def source: String + def text: String + def tpe: String + def children: List[Node] + def fields: Map[String, Node] + def startByte: Int + def endByte: Int +} diff --git a/core/src/test/scala/org/polyvariant/treesitter4s/TreeSitterTest.scala b/core/src/test/scala/org/polyvariant/treesitter4s/TreeSitterTest.scala new file mode 100644 index 0000000..ed366e9 --- /dev/null +++ b/core/src/test/scala/org/polyvariant/treesitter4s/TreeSitterTest.scala @@ -0,0 +1,33 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s + +import weaver.* + +object TreeSitterTest extends FunSuite { + test("Tree Sitter loads") { + try { + println(TreeSitter) + success + } catch { + case e: ExceptionInInitializerError => + e.printStackTrace() + failure("Couldn't load tree-sitter") + } + + } +} diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..57844a0 --- /dev/null +++ b/flake.lock @@ -0,0 +1,59 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1713254108, + "narHash": "sha256-0TZIsfDbHG5zibtlw6x0yOp3jkInIGaJ35B7Y4G8Pec=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "2fd19c8be2551a61c1ddc3d9f86d748f4db94f00", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "type": "indirect" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..58712f6 --- /dev/null +++ b/flake.nix @@ -0,0 +1,14 @@ +{ + inputs.flake-utils.url = "github:numtide/flake-utils"; + + outputs = { nixpkgs, flake-utils, ... }: + flake-utils.lib.eachDefaultSystem + (system: + let pkgs = import nixpkgs { inherit system; }; + in + { + devShells.default = pkgs.mkShell { + packages = [ pkgs.sbt ]; + }; + }); +} diff --git a/project/TreeSitter4sPlugin.scala b/project/TreeSitter4sPlugin.scala new file mode 100644 index 0000000..6d38b9f --- /dev/null +++ b/project/TreeSitter4sPlugin.scala @@ -0,0 +1,227 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s.sbt + +import sbt._ + +import sjsonnew.JsonFormat +import sbt.Keys._ +import sjsonnew.BasicJsonProtocol + +import sbt.util.CacheImplicits._ + +object TreeSitter4sPlugin extends AutoPlugin { + + object autoImport { + + val ts4sCompileCore = settingKey[Boolean]( + "Whether to build the core tree-sitter library." + ) + + val ts4sTreeSitterVersion = settingKey[String]("Version of tree-sitter to compile.") + + val ts4sGrammars = settingKey[Seq[TreeSitterGrammar]]("Grammars to compile binaries for.") + + val ts4sBuildCore = taskKey[Seq[File]]("Build the core tree-sitter library.") + val ts4sBuildGrammars = taskKey[Seq[File]]("Build the tree-sitter grammars.") + + case class TreeSitterGrammar( + language: String, + version: String, + ) + + } + + import autoImport._ + + private object internals { + + // returns path to binary + def downloadAndBuild(lib: Library): os.Path = { + val name = lib.name + val version = lib.version + val binaryName = System.mapLibraryName(lib.name) + + val downloadTo = os.Path(IO.createTemporaryDirectory) + + println(s"Downloading $name $version to $downloadTo") + + import sys.process._ + + requests + .get(s"${lib.repoUrl}/archive/v$version.tar.gz") + .readBytesThrough { bytes => + val cmd = s"tar -xzf - --directory $downloadTo" + + (cmd #< bytes).!! + } + + val extracted = downloadTo / s"$name-$version" + + Process( + command = List("make", binaryName), + cwd = Some(extracted.toIO), + ).!! + + extracted / binaryName + } + + def simplyCached[Input: JsonFormat, Output: JsonFormat]( + f: Input => Output + )( + s: TaskStreams, + tag: String, + ): Input => Output = { + val factory = s.cacheStoreFactory.sub(tag) + + Tracked.inputChanged[Input, Output]( + factory.make("input") + ) { + Function.untupled { + Tracked.lastOutput[(Boolean, Input), Output]( + factory.make("output") + ) { case ((changed, input), lastResult) => + lastResult match { + case Some(cached) if !changed => cached + case _ => f(input) + } + } + } + } + } + + case class Library(name: String, version: String, repoUrl: String) + + def downloadAndBuildTask( + config: Configuration, + library: Library, + tag: String, + ): Def.Initialize[Task[os.Path]] = Def.task { + + val s = (config / streams).value + + implicit val jsonFormatOsPath: JsonFormat[os.Path] = BasicJsonProtocol + .projectFormat[os.Path, File](_.toIO, os.Path(_)) + + implicit val jsonFormatLibrary = BasicJsonProtocol + .projectFormat[Library, (String, String, String)]( + l => (l.name, l.version, l.repoUrl), + { case (name, version, repoUrl) => Library(name, version, repoUrl) }, + ) + + val cached = + simplyCached( + downloadAndBuild + )( + s = s, + tag = tag, + ) + + cached(library) + + } + + def copyLibrary(from: os.Path, to: os.Path): os.Path = { + val target = to / from.last + + os.copy + .over( + from, + target, + createFolders = true, + ) + + target + } + + def compileTreeSitter( + config: Configuration + ): Def.Initialize[Task[File]] = Def.taskDyn { + val output = os.Path((config / resourceManaged).value) + val version = (config / ts4sTreeSitterVersion).value + + Def.task { + val extracted = + downloadAndBuildTask( + config = config, + library = Library( + name = "tree-sitter", + version = version, + repoUrl = "https://github.com/tree-sitter/tree-sitter", + ), + tag = "tree-sitter", + ).value + + copyLibrary(extracted, output).toIO + } + + } + + def compileGrammars(config: Configuration): Def.Initialize[Task[Seq[File]]] = Def.taskDyn { + val output = os.Path((config / resourceManaged).value) + + val grammars = (config / ts4sGrammars).value.toList + + val tasks = grammars.map { grammar => + downloadAndBuildTask( + config = config, + library = Library( + name = s"tree-sitter-${grammar.language}", + version = grammar.version, + repoUrl = s"https://github.com/tree-sitter/tree-sitter-${grammar.language}", + ), + tag = "tree-sitter-libraries", + ) + } + + // I guess this is sbt's way of doing a traverse. + // Can we just get an actual map/traverse? + Def.task { + tasks + .joinWith(_.join) + .value + .map(copyLibrary(_, output).toIO) + } + } + + } + + override def trigger: PluginTrigger = noTrigger + + import internals._ + + override def projectSettings: Seq[Setting[_]] = Seq( + // settings + Compile / ts4sGrammars := Nil, + Compile / ts4sCompileCore := false, + Compile / ts4sTreeSitterVersion := "0.22.6", + + // tasks + Compile / ts4sBuildCore := { + if ((Compile / ts4sCompileCore).value) + compileTreeSitter(Compile).value :: Nil + else + Nil + }, + Compile / ts4sBuildGrammars := compileGrammars(Compile).value, + + // generators + Compile / resourceGenerators += (Compile / ts4sBuildCore).taskValue, + Compile / resourceGenerators += (Compile / ts4sBuildGrammars).taskValue, + ) + +} diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 0000000..081fdbb --- /dev/null +++ b/project/build.properties @@ -0,0 +1 @@ +sbt.version=1.10.0 diff --git a/project/plugins.sbt b/project/plugins.sbt new file mode 100644 index 0000000..be028ff --- /dev/null +++ b/project/plugins.sbt @@ -0,0 +1,8 @@ +addSbtPlugin("org.typelevel" % "sbt-typelevel" % "0.7.1") +addSbtPlugin("org.scala-js" % "sbt-scalajs" % "1.16.0") + +// TODO: keep in sync with sbtPlugin's deps. +libraryDependencies ++= Seq( + "com.lihaoyi" %% "requests" % "0.8.2", + "com.lihaoyi" %% "os-lib" % "0.10.0", +) diff --git a/sbtPlugin/.jvm/src/main/scala/org/polyvariant/treesitter/sbt/TreeSitter4sPlugin.scala b/sbtPlugin/.jvm/src/main/scala/org/polyvariant/treesitter/sbt/TreeSitter4sPlugin.scala new file mode 120000 index 0000000..3a4ba69 --- /dev/null +++ b/sbtPlugin/.jvm/src/main/scala/org/polyvariant/treesitter/sbt/TreeSitter4sPlugin.scala @@ -0,0 +1 @@ +../../../../../../../../../project/TreeSitter4sPlugin.scala \ No newline at end of file diff --git a/tests/src/main/scala/org/polyvariant/treesitter4s/tests/Demo.scala b/tests/src/main/scala/org/polyvariant/treesitter4s/tests/Demo.scala new file mode 100644 index 0000000..ceacb87 --- /dev/null +++ b/tests/src/main/scala/org/polyvariant/treesitter4s/tests/Demo.scala @@ -0,0 +1,30 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s.tests + +import org.polyvariant.treesitter4s.bindings.python.PythonLanguageBindings +import org.polyvariant.treesitter4s.TreeSitter + +object Demo { + + def main(args: Array[String]): Unit = { + val ts = TreeSitter.make(PythonLanguageBindings.Python) + + System.out.println(ts.parse("""def main = print("hello world")""").rootNode.map(_.tpe)) + } + +} diff --git a/tests/src/test/scala/org/polyvariant/treesitter4s/tests/BindingTests.scala b/tests/src/test/scala/org/polyvariant/treesitter4s/tests/BindingTests.scala new file mode 100644 index 0000000..fca325f --- /dev/null +++ b/tests/src/test/scala/org/polyvariant/treesitter4s/tests/BindingTests.scala @@ -0,0 +1,110 @@ +/* + * Copyright 2022 Polyvariant + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.polyvariant.treesitter4s.tests + +import cats.implicits._ +import org.polyvariant.treesitter4s.Tree +import weaver._ +import org.polyvariant.treesitter4s.TreeSitter +import org.polyvariant.treesitter4s.bindings.python.PythonLanguageBindings + +object BindingTests extends FunSuite { + val tsPython = TreeSitter.make(PythonLanguageBindings.Python) + + def parseExample(s: String): Tree = tsPython.parse(s) + + test("root node child count") { + val tree = parseExample("def main = print('Hello')\n") + val rootNode = tree.rootNode + + assert.eql(rootNode.map(_.children.length), Some(2)) + } + + // test("root node child type") { + // val tree = parseExample("class Hello {}") + // val rootNode = tree.rootNode + + // assert.eql(rootNode.map(_.tpe), Some("compilation_unit")) + // } + + // test("root node child by index (in range)") { + // val tree = parseExample("class Hello {}") + + // val rootNode = tree.rootNode.getOrElse(sys.error("missing root node")) + + // assert.eql(rootNode.children.lift(0).isDefined, true) + // } + + // test("root node child by index (out of range)") { + // val tree = parseExample("class Hello {}") + // val rootNode = tree.rootNode.getOrElse(sys.error("missing root node")) + + // assert.eql(rootNode.children.lift(-1).isDefined, false) + // } + + // test("root node, range") { + // val tree = parseExample("class Hello {}") + // val rootNode = tree.rootNode.getOrElse(sys.error("missing root node")) + + // assert.eql(rootNode.startByte, 0) && + // assert.eql(rootNode.endByte, 14) + // } + + // test("root node source") { + // val tree = parseExample("class Hello {}") + // val node = tree.rootNode.getOrElse(sys.error("missing root node")) + + // assert.eql(node.source, "class Hello {}") + // } + + // test("node source") { + // val tree = parseExample("class Hello {}") + // val node = tree.rootNode.getOrElse(sys.error("missing root node")).children(0).children(1) + + // assert.eql(node.source, "Hello") + // } + + // test("root node text") { + // val tree = parseExample("class Hello {}") + // val node = tree.rootNode.getOrElse(sys.error("missing root node")) + + // assert.eql( + // node.text, + // "(compilation_unit (class_definition name: (identifier) body: (template_body)))", + // ) + // } + + // test("node text") { + // val tree = parseExample("class Hello {}") + // val node = tree.rootNode.getOrElse(sys.error("missing root node")).children(0).children(1) + + // assert.eql(node.text, "(identifier)") + // } + + // test("node fields") { + // val tree = parseExample("class Hello {}") + // val node = tree.rootNode.getOrElse(sys.error("missing root node")).children.head + + // val fieldNames = node.fields.keys.toList + // assert.eql(fieldNames, "name" :: "body" :: Nil) && + // assert.eql( + // node.fields.fmap(n => (n.source, n.tpe)), + // Map("name" -> (("Hello", "identifier")), "body" -> (("{}", "template_body"))), + // ) + // } + +}