From 29b813394f2815b58c6c206d083f406f34228995 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Wed, 27 Dec 2023 23:26:36 -0800 Subject: [PATCH 1/3] Change format for RDF lists --- src/ldtab/rdf_list_handling.clj | 47 +++++++++++++++++++++++++++++++++ src/ldtab/thin2thick.clj | 14 +++++++--- 2 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 src/ldtab/rdf_list_handling.clj diff --git a/src/ldtab/rdf_list_handling.clj b/src/ldtab/rdf_list_handling.clj new file mode 100644 index 0000000..50f5730 --- /dev/null +++ b/src/ldtab/rdf_list_handling.clj @@ -0,0 +1,47 @@ +(ns ldtab.rdf-list-handling + (:require [cheshire.core :as cs]) + (:gen-class)) + +(declare encode-rdf-list) + +(defn collect-list-elements [json acc] + (let [element (first (get json "rdf:first")) + remainder (:object (first (get json "rdf:rest")))] + (if (= remainder "rdf:nil") + (conj acc element) + (recur remainder (conj acc element))))) + +(defn is-rdf-list? [json] + (if (map? json) + (let [object (:object json) + datatype (:datatype json)] + (and (map? object) + (contains? object "rdf:first") + (contains? object "rdf:rest") + (= datatype "_JSONMAP"))) + false)) + +;json is required to be an rdf list +(defn encode-rdf-list-object [json] + (let [elements (collect-list-elements json []) + encoded-elements (map #(encode-rdf-list %) elements)] + {:object (into [] encoded-elements) + :datatype "_JSONLIST"})) + +(defn map-on-hash-map-vals + "Given a hashmap m and a function f, + apply f to all values of m. + Example: + Given m = {:a 1, :b 2} and f = (fn [x] (inc x)), + then (map-on-hash-map-vals f m) = {:a 2, :b 3}" + [f m] + (zipmap (keys m) (map f (vals m)))) + +(defn encode-rdf-list + "Given a JSON value, return a lexicographically ordered representation." + [m] + (cond + (is-rdf-list? m) (encode-rdf-list-object (:object m)) + (map? m) (map-on-hash-map-vals encode-rdf-list m) + (coll? m) (into [] (map encode-rdf-list m)) + :else m)) diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index 7a6c15d..a6091fb 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -2,6 +2,7 @@ (:require [clojure.set :as set] [clojure.string :as str] [ldtab.annotation-handling :as ann] + [ldtab.rdf-list-handling :as rdf-list] [ldtab.gci-handling :as gci] [cheshire.core :as cs]) (:import [org.apache.jena.graph NodeFactory Triple Node]) @@ -119,7 +120,7 @@ (defn get-datatype ([^Node node] (cond - (.isBlank node) "_JSON" + (.isBlank node) "_JSONMAP" (.isURI node) "_IRI" ;NB: Jena can't identify plain literals (.isLiteral node) (let [datatype (.getLiteralDatatypeURI node) @@ -130,7 +131,7 @@ :else "ERROR")) ([^Node node iri2prefix] (cond - (.isBlank node) "_JSON" + (.isBlank node) "_JSONMAP" (.isURI node) "_IRI" ;NB: Jena can't identify plain literals (.isLiteral node) (let [datatype (curify-with (.getLiteralDatatypeURI node) iri2prefix) @@ -206,6 +207,9 @@ "Given a JSON value, return a lexicographically ordered representation." [m] (cond + (and (map? m) + (contains? m :datatype) + (= (:datatype m) "_JSONLIST")) (into (sorted-map) {:datatype "_JSONLIST", :object (map sort-json (:object m))}) (map? m) (into (sorted-map) (map-on-hash-map-vals sort-json m)) ;sort by key (coll? m) (vec (map cs/parse-string ;sort by string comparison (sort (map #(cs/generate-string (sort-json %)) @@ -263,7 +267,8 @@ (= (:predicate %) "rdf:Statement")) (ann/encode-raw-annotation-map (:object %)) %) gcis) - sorted (map sort-json annotations) + rdf-lists (map rdf-list/encode-rdf-list annotations) + sorted (map sort-json rdf-lists) hashed (map hash-existential-subject-blanknode sorted) normalised (map #(cs/parse-string (cs/generate-string %)) hashed)];TODO: stringify keys - this is a (probably an inefficient?) workaround normalised)) @@ -277,7 +282,8 @@ (= (:predicate %) "rdf:Statement")) (ann/encode-raw-annotation-map (:object %)) %) gcis) - sorted (map sort-json annotations) + rdf-lists (map rdf-list/encode-rdf-list annotations) + sorted (map sort-json rdf-lists) hashed (map hash-existential-subject-blanknode sorted) normalised (map #(cs/parse-string (cs/generate-string %)) hashed)];TODO: stringify keys - this is a (probably an inefficient?) workaround normalised))) From 40d7a33ed0cd2c6026be8298d30dbcf2802775fa Mon Sep 17 00:00:00 2001 From: ckindermann Date: Tue, 2 Jul 2024 17:41:06 -0700 Subject: [PATCH 2/3] Fix handling of unnested RDF lists --- src/ldtab/rdf_list_handling.clj | 21 ++++++++++++++++++++- src/ldtab/thin2thick.clj | 30 +++++++++++++++++++++++------- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/ldtab/rdf_list_handling.clj b/src/ldtab/rdf_list_handling.clj index 50f5730..c1c0a55 100644 --- a/src/ldtab/rdf_list_handling.clj +++ b/src/ldtab/rdf_list_handling.clj @@ -11,6 +11,19 @@ (conj acc element) (recur remainder (conj acc element))))) +(defn is-top-level-rdf-list? [json] + (if (and (map? json) + (contains? json :subject) + (contains? json :predicate) + (contains? json :object)) + (let [object (:object json) + datatype (:datatype json)] + (and (map? object) + (contains? object "rdf:first") + (contains? object "rdf:rest") + (= datatype "_JSONMAP"))) + false)) + (defn is-rdf-list? [json] (if (map? json) (let [object (:object json) @@ -37,11 +50,17 @@ [f m] (zipmap (keys m) (map f (vals m)))) +(defn encode-top-level-rdf-list [json] + (let [object (:object json) + list-object (:object (encode-rdf-list-object object))] + (assoc json :object list-object :datatype "_JSONLIST"))) + (defn encode-rdf-list "Given a JSON value, return a lexicographically ordered representation." [m] (cond + (is-top-level-rdf-list? m) (encode-top-level-rdf-list m) (is-rdf-list? m) (encode-rdf-list-object (:object m)) - (map? m) (map-on-hash-map-vals encode-rdf-list m) + (map? m) (map-on-hash-map-vals encode-rdf-list m) (coll? m) (into [] (map encode-rdf-list m)) :else m)) diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index a6091fb..66a1a51 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -207,14 +207,30 @@ "Given a JSON value, return a lexicographically ordered representation." [m] (cond - (and (map? m) + ; sort RDF lists + (and (map? m) (contains? m :datatype) - (= (:datatype m) "_JSONLIST")) (into (sorted-map) {:datatype "_JSONLIST", :object (map sort-json (:object m))}) - (map? m) (into (sorted-map) (map-on-hash-map-vals sort-json m)) ;sort by key - (coll? m) (vec (map cs/parse-string ;sort by string comparison - (sort (map #(cs/generate-string (sort-json %)) - m)))) - :else m)) + (= (:datatype m) "_JSONLIST")) + (let [sorted-list {:datatype "_JSONLIST", :object (map sort-json (:object m))}] + (if (contains? m :subject) ; top-level RDF list + (into (sorted-map) (merge sorted-list + {:subject (sort-json (:subject m)) + :predicate (:predicate m) + :graph (:graph m) + :assertion (:assertion m) + :retraction (:retraction m) + :annotation (:annotation m)})) + (into (sorted-map) sorted-list))); nested RDF list + + (map? m) + (into (sorted-map) (map-on-hash-map-vals sort-json m)) ; sort by key + + (coll? m) + (vec (map cs/parse-string ; sort by string comparison + (sort (map #(cs/generate-string (sort-json %)) m)))) + + :else + m)) (defn map-subject-2-thin-triples "Given a set of thin triples, From b219472554eb231c644d70b7215caa09f006f743 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Wed, 3 Jul 2024 18:13:39 -0700 Subject: [PATCH 3/3] Add export support for RDF lists --- src/ldtab/thick_rdf.clj | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/ldtab/thick_rdf.clj b/src/ldtab/thick_rdf.clj index f773a3e..7ce90a8 100644 --- a/src/ldtab/thick_rdf.clj +++ b/src/ldtab/thick_rdf.clj @@ -63,7 +63,7 @@ (let [uri (curie-2-uri datatype prefix-2-base)] (.createTypedLiteral model literal uri))) -(defn translate-json ^Resource +(defn translate-json-map ^Resource [json prefix-2-base ^Model model] (let [bnode (.createResource model)] (doseq [k (keys json)] @@ -74,11 +74,27 @@ (translate-predicate-map x prefix-2-base model)))) bnode)) +(defn translate-rdf-list ^Resource + [json prefix-2-base ^Model model] + (let [bnode (.createResource model) + rdf-first (.createProperty model (curie-2-uri "rdf:first" prefix-2-base)) + rdf-rest (.createProperty model (curie-2-uri "rdf:rest" prefix-2-base)) + rdf-nil (.createResource model (curie-2-uri "rdf:nil" prefix-2-base)) + rdf-type (.createProperty model (curie-2-uri "rdf:type" prefix-2-base)) + rdf-first-value (first json) + rdf-rest-value (rest json)] + (.add model bnode rdf-first (translate-predicate-map rdf-first-value prefix-2-base model)) + (if (and rdf-rest-value (not (empty? rdf-rest-value))) + (.add model bnode rdf-rest (translate-rdf-list rdf-rest-value prefix-2-base model)) + (.add model bnode rdf-rest rdf-nil)) + bnode)) + (defn translate-predicate-map ^Resource [predicateMap prefix-2-base ^Model model] (case (get predicateMap "datatype") "_IRI" (translate-iri (get predicateMap "object") prefix-2-base model) - "_JSON" (translate-json (get predicateMap "object") prefix-2-base model) + "_JSONMAP" (translate-json-map (get predicateMap "object") prefix-2-base model) + "_JSONLIST" (translate-rdf-list (get predicateMap "object") prefix-2-base model) (translate-literal (get predicateMap "object") (get predicateMap "datatype") prefix-2-base model))) (defn translate-annotation ^Resource @@ -126,7 +142,9 @@ (let [success (try (cs/parse-string json) (catch Exception e nil)) - success (map? success)] + success (or (map? success) + (coll? success) + (seq? success))] (if success (cs/parse-string json) json))) @@ -148,7 +166,7 @@ subject-json (parse-json (:subject thick-triple)) subject (if (string? subject-json) (translate-iri subject-json prefix-2-base model) - (translate-json subject-json prefix-2-base model)) + (translate-json-map subject-json prefix-2-base model)) predicate (translate-property (:predicate thick-triple) prefix-2-base model) object (translate-predicate-map tt prefix-2-base model) annotation (parse-json (:annotation thick-triple))]