Skip to content

Commit

Permalink
Add SqlVisitor and ASTWalker; use for query->columns
Browse files Browse the repository at this point in the history
  • Loading branch information
tsmacdonald committed Feb 15, 2024
1 parent 13d7f4a commit 9f01e14
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 56 deletions.
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@ clj -T:build jar

This will create a JAR in the `target` directory.

The build process is slightly complicated since Macaw mixes Clojure and Java
files. If you're working on Macaw itself and make changes to a Java file, you
must:
## Working with the Java files

1. Rebuild
To compile the Java files, use

```
clj -T:build compile
```

If you're working on Macaw and make changes to a Java file, you must:

1. Recompile
2. Restart your Clojure REPL

for the changes to take effect.
2 changes: 1 addition & 1 deletion deps.edn
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{:paths
["src" "java" "resources"]
["src" "java" "resources" "target/classes"]

:deps
{com.github.jsqlparser/jsqlparser {:mvn/version "4.8"}} ; The actual SQL Parser to wrap!
Expand Down
33 changes: 32 additions & 1 deletion java/com/metabase/SqlVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,44 @@
import net.sf.jsqlparser.schema.Column;
import net.sf.jsqlparser.schema.Table;

/**
* Clojure is not good at working with Java Visitors. They require over<em>riding</em> various over<em>loaded</em>
* methods and, in the case of walking a tree (exactly what we want to do here) can be counted on to call `visit()`
* recursively.
*
* Clojure's two main ways of dealing with this are `reify`, which does not permit overloading, and `proxy`, which does.
* However, creating a proxy object creates a completely new object that does not inherit behavior defined in the parent
* class. Therefore, if you have code like this:
*
* <code>
(proxy
[TablesNamesFinder]
[]
(visit [visitable]
(if (instance? Column visitable)
(swap! columns conj (.getColumnName ^Column visitable))
(let [^StatementVisitor this this]
(proxy-super visit visitable)))))
</code>
* the call to `proxy-super` does <em>not</em> call `TablesNamesFinder.visit()` with the non-`Column` `visitable`; that
* definition has been lost.
*
* <hr>
*
* Therefore, this interface was created to provide a more convenient escape hatch for Clojure. It removes the
* overloading requirement for the conventional visitor pattern, instead providing differently-named methods for each
* type. This lets Clojure code use `reify` to implement each method with the necessary behavior. The recursive
* tree-walking is handled by a different class, which calls the methods defined here along the way. Think of them as
* hooks for Clojure-land that don't affect the main behavior of the visitor.
*/
public interface SqlVisitor {

/**
* Called for every `Column` encountered, presumably for side effects.
*/

public void visitColumn(Column column);

/**
* Called for every `Table` encountered, presumably for side effects.
*/
Expand Down
5 changes: 2 additions & 3 deletions java/com/metabase/macaw/ASTWalker.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,8 @@
import net.sf.jsqlparser.statement.upsert.Upsert;

/**
* Walks the AST, using JSqlParser's `visit()` methods. Each `visit()` method
* additionally calls a `visit____()` method (e.g., `visitColumn()`) that can be
* overriden by client classes.
* Walks the AST, using JSqlParser's `visit()` methods. Each `visit()` method additionally calls a `visit____()` method
* (e.g., `visitColumn()`) as defined in the [[SqlVisitor]] interface that can be overriden by client classes.
*/
public class ASTWalker implements SelectVisitor, FromItemVisitor, ExpressionVisitor,
SelectItemVisitor, StatementVisitor {
Expand Down
54 changes: 7 additions & 47 deletions src/macaw/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -10,61 +10,37 @@
(net.sf.jsqlparser.schema
Column)
(net.sf.jsqlparser.statement
Statement
StatementVisitor)
(net.sf.jsqlparser.statement.update
Update)
Statement)
(net.sf.jsqlparser.util
TablesNamesFinder)))

#_(set! *warn-on-reflection* true)

(defn query->tables
"Given a parsed query (i.e., a subclass of `Statement`) return a list of fully-qualified table names found within it.
"Given a parsed query (i.e., a [subclass of] `Statement`) return a list of fully-qualified table names found within it.
Note: 'fully-qualified' means 'as found in the query'; it doesn't extrapolate schema names from other data sources."
[^Statement parsed-query]
(let [table-finder (TablesNamesFinder.)]
(.getTableList table-finder parsed-query)))

(defprotocol ConjColumn
(conj-column! [x known-columns]))

(extend-protocol ConjColumn
Model
(conj-column! [_ _known-column-names]
(println "nothing to add")
nil)

Column
(conj-column! [^Column column known-column-names]
(println "CONJing!")
(swap! known-column-names conj (.getColumnName column))))


(defn query->columns
"TODO: implement!"
"Given a parsed query (i.e., a [subclass of] `Statement`) return a list of the column names found within it.)"
[^Statement parsed-query]
(let [column-names (atom [])
column-finder (reify
SqlVisitor
(visitColumn (_this ^Column column)
(^void visitColumn [_this ^Column column]
(swap! column-names conj (.getColumnName column)))
(visitTable (_this _table)))]
(visitTable [_this _table]))]
(.walk (ASTWalker. column-finder) parsed-query)
@column-names))


(defn parsed-query
"Main entry point: takes a string query and returns a `Statement` object that can be handled by the other functions."
[^String query]
(CCJSqlParserUtil/parse query))

(-> "select foo, bar from baz;"
parsed-query
query->columns)


(defn resolve-columns
"TODO: Make this use metadata we know about.
TODO: If nil is a column (from a select *) then no need for the rest of the entries
Expand All @@ -80,23 +56,7 @@
(defn lineage
"Returns a sequence of the columns used in / referenced by the query"
[query]
(let [parsed (parsed-query query)
tables (query->tables parsed)
(let [parsed (parsed-query query)
tables (query->tables parsed)
columns (query->columns parsed)]
(resolve-columns tables columns)))





(comment



@(u/prog1 (atom [])
(conj-column! 1 <>)
(conj-column! 2.0 <>)
(conj-column! (Integer. 8) <>))


)
7 changes: 7 additions & 0 deletions test/macaw/core_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
(is (= ["core_user"]
(tables "select * from (select distinct email from core_user) q;")))))

(def columns (comp set m/query->columns m/parsed-query))

(deftest ^:parallel query->columns-test
(testing "Simple queries"
(is (= #{"foo" "bar" "id" "quux_id"}
(columns "select foo, bar from baz inner join quux on quux.id = baz.quux_id")))))

(deftest ^:parallel resolve-columns-test
(let [cols ["name" "id" "email"]]
(is (= {"core_user" cols
Expand Down

0 comments on commit 9f01e14

Please sign in to comment.