Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change format for RDF lists #25

Merged
merged 4 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/ldtab/rdf_list_handling.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
(ns ldtab.rdf-list-handling
(:require [cheshire.core :as cs])
(:gen-class))

(declare encode-rdf-list)

(defn collect-list-elements [json acc]
(let [element (first (get json "rdf:first"))
remainder (:object (first (get json "rdf:rest")))]
(if (= remainder "rdf:nil")
(conj acc element)
(recur remainder (conj acc element)))))

(defn is-top-level-rdf-list? [json]
(if (and (map? json)
(contains? json :subject)
(contains? json :predicate)
(contains? json :object))
(let [object (:object json)
datatype (:datatype json)]
(and (map? object)
(contains? object "rdf:first")
(contains? object "rdf:rest")
(= datatype "_JSONMAP")))
false))

(defn is-rdf-list? [json]
(if (map? json)
(let [object (:object json)
datatype (:datatype json)]
(and (map? object)
(contains? object "rdf:first")
(contains? object "rdf:rest")
(= datatype "_JSONMAP")))
false))

;json is required to be an rdf list
(defn encode-rdf-list-object [json]
(let [elements (collect-list-elements json [])
encoded-elements (map #(encode-rdf-list %) elements)]
{:object (into [] encoded-elements)
:datatype "_JSONLIST"}))

(defn map-on-hash-map-vals
"Given a hashmap m and a function f,
apply f to all values of m.
Example:
Given m = {:a 1, :b 2} and f = (fn [x] (inc x)),
then (map-on-hash-map-vals f m) = {:a 2, :b 3}"
[f m]
(zipmap (keys m) (map f (vals m))))

(defn encode-top-level-rdf-list [json]
(let [object (:object json)
list-object (:object (encode-rdf-list-object object))]
(assoc json :object list-object :datatype "_JSONLIST")))

(defn encode-rdf-list
"Given a JSON value, return a lexicographically ordered representation."
[m]
(cond
(is-top-level-rdf-list? m) (encode-top-level-rdf-list m)
(is-rdf-list? m) (encode-rdf-list-object (:object m))
(map? m) (map-on-hash-map-vals encode-rdf-list m)
(coll? m) (into [] (map encode-rdf-list m))
:else m))
26 changes: 22 additions & 4 deletions src/ldtab/thick_rdf.clj
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
(let [uri (curie-2-uri datatype prefix-2-base)]
(.createTypedLiteral model literal uri)))

(defn translate-json ^Resource
(defn translate-json-map ^Resource
[json prefix-2-base ^Model model]
(let [bnode (.createResource model)]
(doseq [k (keys json)]
Expand All @@ -74,11 +74,27 @@
(translate-predicate-map x prefix-2-base model))))
bnode))

(defn translate-rdf-list ^Resource
[json prefix-2-base ^Model model]
(let [bnode (.createResource model)
rdf-first (.createProperty model (curie-2-uri "rdf:first" prefix-2-base))
rdf-rest (.createProperty model (curie-2-uri "rdf:rest" prefix-2-base))
rdf-nil (.createResource model (curie-2-uri "rdf:nil" prefix-2-base))
rdf-type (.createProperty model (curie-2-uri "rdf:type" prefix-2-base))
rdf-first-value (first json)
rdf-rest-value (rest json)]
(.add model bnode rdf-first (translate-predicate-map rdf-first-value prefix-2-base model))
(if (and rdf-rest-value (not (empty? rdf-rest-value)))
(.add model bnode rdf-rest (translate-rdf-list rdf-rest-value prefix-2-base model))
(.add model bnode rdf-rest rdf-nil))
bnode))

(defn translate-predicate-map ^Resource
[predicateMap prefix-2-base ^Model model]
(case (get predicateMap "datatype")
"_IRI" (translate-iri (get predicateMap "object") prefix-2-base model)
"_JSON" (translate-json (get predicateMap "object") prefix-2-base model)
"_JSONMAP" (translate-json-map (get predicateMap "object") prefix-2-base model)
"_JSONLIST" (translate-rdf-list (get predicateMap "object") prefix-2-base model)
(translate-literal (get predicateMap "object") (get predicateMap "datatype") prefix-2-base model)))

(defn translate-annotation ^Resource
Expand Down Expand Up @@ -126,7 +142,9 @@
(let [success (try
(cs/parse-string json)
(catch Exception e nil))
success (map? success)]
success (or (map? success)
(coll? success)
(seq? success))]
(if success
(cs/parse-string json)
json)))
Expand All @@ -148,7 +166,7 @@
subject-json (parse-json (:subject thick-triple))
subject (if (string? subject-json)
(translate-iri subject-json prefix-2-base model)
(translate-json subject-json prefix-2-base model))
(translate-json-map subject-json prefix-2-base model))
predicate (translate-property (:predicate thick-triple) prefix-2-base model)
object (translate-predicate-map tt prefix-2-base model)
annotation (parse-json (:annotation thick-triple))]
Expand Down
40 changes: 31 additions & 9 deletions src/ldtab/thin2thick.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
(:require [clojure.set :as set]
[clojure.string :as str]
[ldtab.annotation-handling :as ann]
[ldtab.rdf-list-handling :as rdf-list]
[ldtab.gci-handling :as gci]
[cheshire.core :as cs])
(:import [org.apache.jena.graph NodeFactory Triple Node])
Expand Down Expand Up @@ -119,7 +120,7 @@
(defn get-datatype
([^Node node]
(cond
(.isBlank node) "_JSON"
(.isBlank node) "_JSONMAP"
(.isURI node) "_IRI"
;NB: Jena can't identify plain literals
(.isLiteral node) (let [datatype (.getLiteralDatatypeURI node)
Expand All @@ -130,7 +131,7 @@
:else "ERROR"))
([^Node node iri2prefix]
(cond
(.isBlank node) "_JSON"
(.isBlank node) "_JSONMAP"
(.isURI node) "_IRI"
;NB: Jena can't identify plain literals
(.isLiteral node) (let [datatype (curify-with (.getLiteralDatatypeURI node) iri2prefix)
Expand Down Expand Up @@ -206,11 +207,30 @@
"Given a JSON value, return a lexicographically ordered representation."
[m]
(cond
(map? m) (into (sorted-map) (map-on-hash-map-vals sort-json m)) ;sort by key
(coll? m) (vec (map cs/parse-string ;sort by string comparison
(sort (map #(cs/generate-string (sort-json %))
m))))
:else m))
; sort RDF lists
(and (map? m)
(contains? m :datatype)
(= (:datatype m) "_JSONLIST"))
(let [sorted-list {:datatype "_JSONLIST", :object (map sort-json (:object m))}]
(if (contains? m :subject) ; top-level RDF list
(into (sorted-map) (merge sorted-list
{:subject (sort-json (:subject m))
:predicate (:predicate m)
:graph (:graph m)
:assertion (:assertion m)
:retraction (:retraction m)
:annotation (:annotation m)}))
(into (sorted-map) sorted-list))); nested RDF list

(map? m)
(into (sorted-map) (map-on-hash-map-vals sort-json m)) ; sort by key

(coll? m)
(vec (map cs/parse-string ; sort by string comparison
(sort (map #(cs/generate-string (sort-json %)) m))))

:else
m))

(defn map-subject-2-thin-triples
"Given a set of thin triples,
Expand Down Expand Up @@ -263,7 +283,8 @@
(= (:predicate %) "rdf:Statement"))
(ann/encode-raw-annotation-map (:object %))
%) gcis)
sorted (map sort-json annotations)
rdf-lists (map rdf-list/encode-rdf-list annotations)
sorted (map sort-json rdf-lists)
hashed (map hash-existential-subject-blanknode sorted)
normalised (map #(cs/parse-string (cs/generate-string %)) hashed)];TODO: stringify keys - this is a (probably an inefficient?) workaround
normalised))
Expand All @@ -277,7 +298,8 @@
(= (:predicate %) "rdf:Statement"))
(ann/encode-raw-annotation-map (:object %))
%) gcis)
sorted (map sort-json annotations)
rdf-lists (map rdf-list/encode-rdf-list annotations)
sorted (map sort-json rdf-lists)
hashed (map hash-existential-subject-blanknode sorted)
normalised (map #(cs/parse-string (cs/generate-string %)) hashed)];TODO: stringify keys - this is a (probably an inefficient?) workaround
normalised)))
Loading