diff --git a/src/ldtab/rdf_list_handling.clj b/src/ldtab/rdf_list_handling.clj new file mode 100644 index 0000000..c1c0a55 --- /dev/null +++ b/src/ldtab/rdf_list_handling.clj @@ -0,0 +1,66 @@ +(ns ldtab.rdf-list-handling + (:require [cheshire.core :as cs]) + (:gen-class)) + +(declare encode-rdf-list) + +(defn collect-list-elements [json acc] + (let [element (first (get json "rdf:first")) + remainder (:object (first (get json "rdf:rest")))] + (if (= remainder "rdf:nil") + (conj acc element) + (recur remainder (conj acc element))))) + +(defn is-top-level-rdf-list? [json] + (if (and (map? json) + (contains? json :subject) + (contains? json :predicate) + (contains? json :object)) + (let [object (:object json) + datatype (:datatype json)] + (and (map? object) + (contains? object "rdf:first") + (contains? object "rdf:rest") + (= datatype "_JSONMAP"))) + false)) + +(defn is-rdf-list? [json] + (if (map? json) + (let [object (:object json) + datatype (:datatype json)] + (and (map? object) + (contains? object "rdf:first") + (contains? object "rdf:rest") + (= datatype "_JSONMAP"))) + false)) + +;json is required to be an rdf list +(defn encode-rdf-list-object [json] + (let [elements (collect-list-elements json []) + encoded-elements (map #(encode-rdf-list %) elements)] + {:object (into [] encoded-elements) + :datatype "_JSONLIST"})) + +(defn map-on-hash-map-vals + "Given a hashmap m and a function f, + apply f to all values of m. + Example: + Given m = {:a 1, :b 2} and f = (fn [x] (inc x)), + then (map-on-hash-map-vals f m) = {:a 2, :b 3}" + [f m] + (zipmap (keys m) (map f (vals m)))) + +(defn encode-top-level-rdf-list [json] + (let [object (:object json) + list-object (:object (encode-rdf-list-object object))] + (assoc json :object list-object :datatype "_JSONLIST"))) + +(defn encode-rdf-list + "Given a JSON value, return a lexicographically ordered representation." + [m] + (cond + (is-top-level-rdf-list? m) (encode-top-level-rdf-list m) + (is-rdf-list? m) (encode-rdf-list-object (:object m)) + (map? m) (map-on-hash-map-vals encode-rdf-list m) + (coll? m) (into [] (map encode-rdf-list m)) + :else m)) diff --git a/src/ldtab/thick_rdf.clj b/src/ldtab/thick_rdf.clj index f773a3e..7ce90a8 100644 --- a/src/ldtab/thick_rdf.clj +++ b/src/ldtab/thick_rdf.clj @@ -63,7 +63,7 @@ (let [uri (curie-2-uri datatype prefix-2-base)] (.createTypedLiteral model literal uri))) -(defn translate-json ^Resource +(defn translate-json-map ^Resource [json prefix-2-base ^Model model] (let [bnode (.createResource model)] (doseq [k (keys json)] @@ -74,11 +74,27 @@ (translate-predicate-map x prefix-2-base model)))) bnode)) +(defn translate-rdf-list ^Resource + [json prefix-2-base ^Model model] + (let [bnode (.createResource model) + rdf-first (.createProperty model (curie-2-uri "rdf:first" prefix-2-base)) + rdf-rest (.createProperty model (curie-2-uri "rdf:rest" prefix-2-base)) + rdf-nil (.createResource model (curie-2-uri "rdf:nil" prefix-2-base)) + rdf-type (.createProperty model (curie-2-uri "rdf:type" prefix-2-base)) + rdf-first-value (first json) + rdf-rest-value (rest json)] + (.add model bnode rdf-first (translate-predicate-map rdf-first-value prefix-2-base model)) + (if (and rdf-rest-value (not (empty? rdf-rest-value))) + (.add model bnode rdf-rest (translate-rdf-list rdf-rest-value prefix-2-base model)) + (.add model bnode rdf-rest rdf-nil)) + bnode)) + (defn translate-predicate-map ^Resource [predicateMap prefix-2-base ^Model model] (case (get predicateMap "datatype") "_IRI" (translate-iri (get predicateMap "object") prefix-2-base model) - "_JSON" (translate-json (get predicateMap "object") prefix-2-base model) + "_JSONMAP" (translate-json-map (get predicateMap "object") prefix-2-base model) + "_JSONLIST" (translate-rdf-list (get predicateMap "object") prefix-2-base model) (translate-literal (get predicateMap "object") (get predicateMap "datatype") prefix-2-base model))) (defn translate-annotation ^Resource @@ -126,7 +142,9 @@ (let [success (try (cs/parse-string json) (catch Exception e nil)) - success (map? success)] + success (or (map? success) + (coll? success) + (seq? success))] (if success (cs/parse-string json) json))) @@ -148,7 +166,7 @@ subject-json (parse-json (:subject thick-triple)) subject (if (string? subject-json) (translate-iri subject-json prefix-2-base model) - (translate-json subject-json prefix-2-base model)) + (translate-json-map subject-json prefix-2-base model)) predicate (translate-property (:predicate thick-triple) prefix-2-base model) object (translate-predicate-map tt prefix-2-base model) annotation (parse-json (:annotation thick-triple))] diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index 7a6c15d..66a1a51 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -2,6 +2,7 @@ (:require [clojure.set :as set] [clojure.string :as str] [ldtab.annotation-handling :as ann] + [ldtab.rdf-list-handling :as rdf-list] [ldtab.gci-handling :as gci] [cheshire.core :as cs]) (:import [org.apache.jena.graph NodeFactory Triple Node]) @@ -119,7 +120,7 @@ (defn get-datatype ([^Node node] (cond - (.isBlank node) "_JSON" + (.isBlank node) "_JSONMAP" (.isURI node) "_IRI" ;NB: Jena can't identify plain literals (.isLiteral node) (let [datatype (.getLiteralDatatypeURI node) @@ -130,7 +131,7 @@ :else "ERROR")) ([^Node node iri2prefix] (cond - (.isBlank node) "_JSON" + (.isBlank node) "_JSONMAP" (.isURI node) "_IRI" ;NB: Jena can't identify plain literals (.isLiteral node) (let [datatype (curify-with (.getLiteralDatatypeURI node) iri2prefix) @@ -206,11 +207,30 @@ "Given a JSON value, return a lexicographically ordered representation." [m] (cond - (map? m) (into (sorted-map) (map-on-hash-map-vals sort-json m)) ;sort by key - (coll? m) (vec (map cs/parse-string ;sort by string comparison - (sort (map #(cs/generate-string (sort-json %)) - m)))) - :else m)) + ; sort RDF lists + (and (map? m) + (contains? m :datatype) + (= (:datatype m) "_JSONLIST")) + (let [sorted-list {:datatype "_JSONLIST", :object (map sort-json (:object m))}] + (if (contains? m :subject) ; top-level RDF list + (into (sorted-map) (merge sorted-list + {:subject (sort-json (:subject m)) + :predicate (:predicate m) + :graph (:graph m) + :assertion (:assertion m) + :retraction (:retraction m) + :annotation (:annotation m)})) + (into (sorted-map) sorted-list))); nested RDF list + + (map? m) + (into (sorted-map) (map-on-hash-map-vals sort-json m)) ; sort by key + + (coll? m) + (vec (map cs/parse-string ; sort by string comparison + (sort (map #(cs/generate-string (sort-json %)) m)))) + + :else + m)) (defn map-subject-2-thin-triples "Given a set of thin triples, @@ -263,7 +283,8 @@ (= (:predicate %) "rdf:Statement")) (ann/encode-raw-annotation-map (:object %)) %) gcis) - sorted (map sort-json annotations) + rdf-lists (map rdf-list/encode-rdf-list annotations) + sorted (map sort-json rdf-lists) hashed (map hash-existential-subject-blanknode sorted) normalised (map #(cs/parse-string (cs/generate-string %)) hashed)];TODO: stringify keys - this is a (probably an inefficient?) workaround normalised)) @@ -277,7 +298,8 @@ (= (:predicate %) "rdf:Statement")) (ann/encode-raw-annotation-map (:object %)) %) gcis) - sorted (map sort-json annotations) + rdf-lists (map rdf-list/encode-rdf-list annotations) + sorted (map sort-json rdf-lists) hashed (map hash-existential-subject-blanknode sorted) normalised (map #(cs/parse-string (cs/generate-string %)) hashed)];TODO: stringify keys - this is a (probably an inefficient?) workaround normalised)))