Skip to content

Commit

Permalink
Support format preserving replacements
Browse files Browse the repository at this point in the history
  • Loading branch information
crisptrutski committed Feb 28, 2024
1 parent aba1c72 commit ac7522f
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 27 deletions.
12 changes: 10 additions & 2 deletions java/com/metabase/macaw/AstWalker.java
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,21 @@ public void invokeCallback(CallbackKey key, Object visitedItem) {
}

/**
* Main entry point. Walk the given `expression`, invoking the callbacks as appropriate.
* Fold the given `expression`, using the callbacks to update the accumulator as appropriate.
*/
public Acc walk(Expression expression) {
public Acc fold(Expression expression) {
expression.accept(this);
return acc;
}

/**
* Walk the given `expression`, invoking the callbacks for side effects as appropriate.
*/
public Expression walk(Expression expression) {
expression.accept(this);
return expression;
}

@Override
public void visit(Select select) {
List<WithItem> withItemsList = select.getWithItemsList();
Expand Down
40 changes: 16 additions & 24 deletions src/macaw/core.clj
Original file line number Diff line number Diff line change
@@ -1,38 +1,25 @@
(ns macaw.core
(:require
[macaw.rewrite :as rewrite]
[macaw.walk :as mw])
(:import
(com.metabase.macaw
AstWalker
AstWalker$CallbackKey)
(net.sf.jsqlparser.parser
CCJSqlParserUtil)
(net.sf.jsqlparser.schema
Column
Table)
(net.sf.jsqlparser.statement
Statement)))
(net.sf.jsqlparser.parser CCJSqlParserUtil)
(net.sf.jsqlparser.schema Column Table)
(net.sf.jsqlparser.statement Statement)))

(set! *warn-on-reflection* true)

(def callback-keys
"keyword->key map for the AST-folding callbacks."
;; TODO: Move this to a Malli schema to simplify the indirection
{:column AstWalker$CallbackKey/COLUMN
:table AstWalker$CallbackKey/TABLE})

(defn- walk-query [parsed-query callbacks init-val]
(.walk (AstWalker. callbacks init-val) parsed-query))

(defn query->components
"Given a parsed query (i.e., a [subclass of] `Statement`) return a map with the `:tables` and `:columns` found within it.
(Specifically, it returns their fully-qualified names as strings, where 'fully-qualified' means 'as referred to in
the query'; this function doesn't do additional inference work to find out a table's schema.)"
[^Statement parsed-query]
(walk-query parsed-query
{(:column callback-keys) #(update %1 :columns conj (.getColumnName ^Column %2))
(:table callback-keys) #(update %1 :tables conj (.getName ^Table %2))}
{:columns #{}
:tables #{}}))
(mw/fold-query parsed-query
{:column #(update %1 :columns conj (.getColumnName ^Column %2))
:table #(update %1 :tables conj (.getName ^Table %2))}
{:columns #{}
:tables #{}}))

(defn parsed-query
"Main entry point: takes a string query and returns a `Statement` object that can be handled by the other functions."
Expand All @@ -57,3 +44,8 @@
(let [parsed (parsed-query query)
{:keys [columns tables]} (query->components parsed)]
(resolve-columns tables columns)))

(defn replace-names
"Given a SQL query, apply the given table and column renames."
[sql renames]
(rewrite/replace-names sql (parsed-query sql) renames))
73 changes: 73 additions & 0 deletions src/macaw/rewrite.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
(ns macaw.rewrite
(:require
[macaw.walk :as mw])
(:import
(net.sf.jsqlparser.parser ASTNodeAccess SimpleNode)
(net.sf.jsqlparser.schema Column Table)))

(set! *warn-on-reflection* true)

(defn- index-of-nth [^String haystack ^String needle n]
(assert (not (neg? n)))
(if (zero? n)
-1
(loop [n n
idx 0]
(let [next-id (.indexOf haystack needle idx)]
(cond
(= 1 n) next-id
(neg? next-id) next-id
:else (recur (dec n) (inc next-id)))))))

(defn- ->idx [^String sql line col]
(+ col 1 (index-of-nth sql "\n" (dec line))))

(defn- splice-token [seen before [^String during offset] ^SimpleNode node ^String value]
(if (nil? node)
[during offset]
;; work around ast visitor processing (inexplicably and incorrectly) duplicating expressions visits
(if (contains? @seen node)
[during offset]
(let [_ (vswap! seen conj node)
first-token (.jjtGetFirstToken node)
last-token (.jjtGetLastToken node)
first-idx (->idx before
(.-beginLine first-token)
(.-beginColumn first-token))
last-idx (->idx before
(.-endLine last-token)
(.-endColumn last-token))
before (.substring during 0 (+ offset (dec first-idx)))
after (.substring during (+ offset last-idx))
offset' (+ offset (- (.length value) (inc (- last-idx first-idx))))]
;; Optimization: rather than incrementally building strings, we accumulate range-replacement pairs and then
;; reduce over a string builder
[(str before value after)
offset']))))

(defn- update-query
"Emit a SQL string for an updated AST, preserving the comments and whitespace from the original SQL."
[updated-ast sql]
;; work around ast visitor processing (inexplicably and incorrectly) duplicating expressions visits
(let [seen (volatile! #{})
replace-name (fn [->s] (fn [acc visitable]
(splice-token seen sql acc
(.getASTNode ^ASTNodeAccess visitable)
(->s visitable))))]
(first
(mw/fold-query
updated-ast
{:table (replace-name (fn [^Table table] (.getFullyQualifiedName table)))
:column (replace-name (fn [^Column column] (.getFullyQualifiedName column)))}
[sql 0]))))

(defn replace-names
"Given a SQL query and its corresponding (untransformed) AST, apply the given table and column renames."
[sql parsed-ast {table-renames :tables, column-renames :columns}]
(-> parsed-ast
(mw/walk-query
{:table (fn [^Table table] (when-let [name' (get table-renames (.getName table))]
(.setName table name')))
:column (fn [^Column column] (when-let [name' (get column-renames (.getColumnName column))]
(.setColumnName column name')))})
(update-query sql)))
33 changes: 33 additions & 0 deletions src/macaw/walk.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
(ns macaw.walk
(:import
(com.metabase.macaw AstWalker AstWalker$CallbackKey)))

(set! *warn-on-reflection* true)

(def ->callback-key
"keyword->key map for the AST-folding callbacks."
;; TODO: Move this to a Malli schema to simplify the indirection
{:column AstWalker$CallbackKey/COLUMN
:table AstWalker$CallbackKey/TABLE})

(defn- preserve
"Lift a side effecting callback so that it preserves the accumulator."
[f]
(fn [acc v]
(f v)
acc))

(defn- update-keys-vals [m key-f val-f]
(into {} (map (fn [[k v]] [(key-f k) (val-f v)])) m))

(defn walk-query
"Walk over the query's AST, using the callbacks for their side-effects, for example to mutate the AST itself."
[parsed-query callbacks]
(let [callbacks (update-keys-vals callbacks ->callback-key preserve)]
(.walk (AstWalker. callbacks ::ignored) parsed-query)))

(defn fold-query
"Fold over the query's AST, using the callbacks to update the accumulator."
[parsed-query callbacks init-val]
(let [callbacks (update-keys callbacks ->callback-key)]
(.fold (AstWalker. callbacks init-val) parsed-query)))
32 changes: 31 additions & 1 deletion test/macaw/core_test.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
(ns macaw.core-test
(:require
[clojure.test :refer :all]
[clojure.test :refer [deftest testing is]]
[macaw.core :as m]))

(def tables (comp :tables m/query->components m/parsed-query))
Expand Down Expand Up @@ -31,3 +31,33 @@
(is (= {"core_user" cols
"report_card" cols}
(m/resolve-columns ["core_user" "report_card"] cols)))))

(defn test-replacement [before replacements after]
(is (= after (m/replace-names before replacements))))

(deftest ^:parallel replace-names-test
(test-replacement "select a.x, b.y from a, b;"
{:tables {"a" "aa"}
:columns {"x" "xx"}}
"select aa.xx, b.y from aa, b;")

(test-replacement
"select *, boink
, yoink as oink
from /* /* lore */
core_user,
bore_user, /* more */ snore_user ;"

{:tables {"core_user" "floor_muser"
"bore_user" "user"
"snore_user" "vigilant_user"
"cruft" "tuft"}
:columns {"boink" "sturmunddrang"
"yoink" "oink"
"hoi" "polloi"}}

"select *, sturmunddrang
, oink as oink
from /* /* lore */
floor_muser,
user, /* more */ vigilant_user ;"))

0 comments on commit ac7522f

Please sign in to comment.