Skip to content

Commit

Permalink
Merge pull request #6 from culturecreates/artsdata-planet-gtq/feature…
Browse files Browse the repository at this point in the history
…/issue-1

Added SPARQL transformations for gtq and unit tests
saumier authored Jan 4, 2025
2 parents fe611bc + 5705e70 commit bf5df4c
Showing 24 changed files with 513 additions and 6 deletions.
26 changes: 26 additions & 0 deletions sparql/collapse_duplicate_contact_pointblanknodes.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
PREFIX schema: <http://schema.org/>

delete {
?entity schema:contactPoint ?contactPoint .
}
insert {
?entity schema:contactPoint ?prime_contactPoint .
}
where {
?entity a schema:Organization ;
schema:contactPoint ?contactPoint .
filter (isBLANK(?contactPoint))
filter(!isBLANK(?entity))

# select a random blank node to use as prime
{
select ?entity (sample(?contactPoint) as ?prime_contactPoint)
where {
?entity a schema:Organization ;
schema:contactPoint ?contactPoint .
filter (isBLANK(?contactPoint))
filter(!isBLANK(?entity))
}
group by ?entity
}
}
49 changes: 49 additions & 0 deletions sparql/copy_subevent_data_to_eventseries.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
PREFIX schema: <http://schema.org/>

insert {
?s schema:startDate ?firstStartDate .
?s schema:endDate ?lastEndDate .
?s schema:performer ?performer .
?s schema:organizer ?organizer .
?s schema:location ?location .
?s schema:description ?description .
?s schema:image ?image .
?s schema:name ?name .
?s schema:url ?url .
}
where {
OPTIONAL {
?sampleSubEvent schema:performer ?performer .
}
OPTIONAL {
?sampleSubEvent schema:organizer ?organizer .
}
OPTIONAL {
?sampleSubEvent schema:location ?location .
}
OPTIONAL {
?sampleSubEvent schema:description ?description .
}
OPTIONAL {
?sampleSubEvent schema:image ?image .
}
OPTIONAL {
?sampleSubEvent schema:name ?name .
}
OPTIONAL {
?sampleSubEvent schema:url ?url .
}
{
select ?s (min(?startDate) as ?firstStartDate) (max(?endDate) as ?lastEndDate) (sample(?subEvent) as ?sampleSubEvent)
where {
?s a schema:EventSeries .
?s schema:subEvent ?subEvent .
?subEvent schema:startDate ?startDate .
OPTIONAL {
?subEvent schema:endDate ?endDate .
}
filter(not exists {?s schema:name ?name })
}
group by ?s
}
}
17 changes: 17 additions & 0 deletions sparql/create_eventseries.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
PREFIX schema: <http://schema.org/>
insert {
?eventSeries a schema:EventSeries ;
schema:subEvent ?event .
}
where {
?event a schema:Event ; schema:url ?page .
{
select ?page (count(?event) as ?count) ?eventSeries
where {
?event a schema:Event ; schema:url ?page .
bind(URI(concat(str(?page),"#EventSeries")) as ?eventSeries)
}
group by ?page ?eventSeries
HAVING(?count > 1)
}
}
10 changes: 10 additions & 0 deletions sparql/fix_isni.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
PREFIX schema: <http://schema.org/>

insert {
?p schema:sameAs ?isni .
}
where {
?p schema:identifier ?id .
filter(strstarts(?id,"0000"))
bind(URI(concat("https://isni.org/isni/",replace(?id," ",""))) as ?isni)
}
15 changes: 15 additions & 0 deletions sparql/fix_offer_availability.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
PREFIX schema: <http://schema.org/>

delete {
?offer schema:availability ?availability .
}
insert {
?offer schema:availability ?new_availability .
}
where {
?event schema:offers ?offer .
?offer schema:availability ?availability .
filter(!CONTAINS(str(?availability), "http://schema.org/"))
bind(concat("http://schema.org/", ?availability) as ?new_availability)
?event a schema:Event .
}
11 changes: 11 additions & 0 deletions sparql/fix_schemaorg_https_objects.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
delete {
?a ?b ?c .
}
insert {
?a ?b ?c2 .
}
where {
?a ?b ?c .
filter(strstarts(?c, "https://schema.org/"))
bind(URI(concat("http://", strafter(?c, "https://"))) as ?c2)
}
12 changes: 12 additions & 0 deletions sparql/fix_wikidata_uri.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
PREFIX schema: <http://schema.org/>
DELETE {
?e schema:sameAs ?sameAs .
}
INSERT {
?e schema:sameAs ?fixedWikidataURI .
}
WHERE {
?e schema:sameAs ?sameAs .
FILTER(CONTAINS(STR(?sameAs), "www.wikidata.org/wiki/Q"))
BIND(URI(CONCAT("http://www.wikidata.org/entity/Q", STRAFTER(STR(?sameAs), "www.wikidata.org/wiki/Q"))) AS ?fixedWikidataURI)
}
16 changes: 10 additions & 6 deletions src/lib/graph_fetcher.rb
Original file line number Diff line number Diff line change
@@ -18,12 +18,16 @@ def self.load(entity_urls: [], base_url: nil, headers: nil, headless: false)
end

sparql_paths = [
"./sparql/remove_objects.sparql",
"./sparql/fix_entity_type_capital.sparql",
"./sparql/fix_date_timezone.sparql",
"./sparql/fix_address_country_name.sparql",
"./sparql/fix_malformed_urls.sparql",
"./sparql/replace_blank_nodes.sparql",
"./sparql/remove_objects.sparql",
"./sparql/replace_blank_nodes.sparql",
"./sparql/fix_entity_type_capital.sparql",
"./sparql/fix_date_timezone.sparql",
"./sparql/fix_address_country_name.sparql",
"./sparql/fix_malformed_urls.sparql",
"./sparql/fix_schemaorg_https_objects.sparql",
"./sparql/fix_wikidata_uri.sparql",
"./sparql/fix_isni.sparql",
"./sparql/collapse_duplicate_contact_pointblanknodes.sparql"
]

base_url = entity_urls[0].split('/')[0..2].join('/')
18 changes: 18 additions & 0 deletions tests/collapse_duplicate_contact_pointblanknodes_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require 'minitest/autorun'
require 'linkeddata'

class CollapseDuplicateContactPointBlankNodesTest < Minitest::Test
def setup
@sparql_file = "./sparql/collapse_duplicate_contact_pointblanknodes.sparql"
end

def test_collapse_duplicate_contact_pointblanknodes
sparql = SPARQL.parse(File.read(@sparql_file), update: true)
graph = RDF::Graph.load("./tests/fixtures/test_collapse_duplicate_contact_pointblanknodes.jsonld")
graph.query(sparql)
assert_equal(
1,
graph.query([nil, RDF::URI("http://schema.org/contactPoint"), nil]).objects.count
)
end
end
21 changes: 21 additions & 0 deletions tests/copy_subevent_data_to_eventseries_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
require 'minitest/autorun'
require 'linkeddata'

class CopySubeventDataToEventSeriesTest < Minitest::Test
def setup
@sparql_file = "./sparql/copy_subevent_data_to_eventseries.sparql"
end

def test_copy_subevent_data_to_eventseries
sparql = SPARQL.parse(File.read(@sparql_file), update: true)
graph = RDF::Graph.load("./tests/fixtures/test_copy_subevent_data_to_eventseries.jsonld")
graph.query(sparql)
assert_equal(
["2024-12-01T18:00:00", "2024-12-02T20:00:00"],
[
graph.query([RDF::URI("http://example.com/event-series/1"), RDF::URI("http://schema.org/startDate"), nil]).objects.first.value,
graph.query([RDF::URI("http://example.com/event-series/1"), RDF::URI("http://schema.org/endDate"), nil]).objects.first.value
]
)
end
end
18 changes: 18 additions & 0 deletions tests/create_eventseries_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require 'minitest/autorun'
require 'linkeddata'

class CreateEventSeriesTest < Minitest::Test
def setup
@sparql_file = "./sparql/create_eventseries.sparql"
end

def test_create_eventseries
sparql = SPARQL.parse(File.read(@sparql_file), update: true)
graph = RDF::Graph.load("./tests/fixtures/test_create-eventseries.jsonld")
graph.query(sparql)
assert_equal(
4,
graph.query([nil, RDF::URI("http://schema.org/subEvent"), nil]).objects.count
)
end
end
18 changes: 18 additions & 0 deletions tests/fix_isni_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require 'minitest/autorun'
require 'linkeddata'

class FixIsniTest < Minitest::Test
def setup
@sparql_file = "./sparql/fix_isni.sparql"
end

def test_fix_isni
sparql = SPARQL.parse(File.read(@sparql_file), update: true)
graph = RDF::Graph.load("./tests/fixtures/test_fix_isni.jsonld")
graph.query(sparql)
assert_equal(
"https://isni.org/isni/00001234567",
graph.query([RDF::URI("https://example.org/person/123"), RDF::URI("http://schema.org/sameAs"), nil]).objects.first.value
)
end
end
18 changes: 18 additions & 0 deletions tests/fix_offer_availability_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require 'minitest/autorun'
require 'linkeddata'

class FixOfferAvailabilityTest < Minitest::Test
def setup
@sparql_file = "./sparql/fix_offer_availability.sparql"
end

def test_fix_offer_availability
sparql = SPARQL.parse(File.read(@sparql_file), update: true)
graph = RDF::Graph.load("./tests/fixtures/test_fix_offer_availability.jsonld")
graph.query(sparql)
assert_equal(
"http://schema.org/outOfStock",
graph.query([nil, RDF::URI("http://schema.org/availability"), nil]).objects.first.value
)
end
end
18 changes: 18 additions & 0 deletions tests/fix_schemaorg_https_objects_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require 'minitest/autorun'
require 'linkeddata'

class FixSchemaOrgHttpsObjectsTest < Minitest::Test
def setup
@sparql_file = "./sparql/fix_schemaorg_https_objects.sparql"
end

def test_fix_schemaorg_https_objects
sparql = SPARQL.parse(File.read(@sparql_file), update: true)
graph = RDF::Graph.load("./tests/fixtures/test_fix_schemaorg_https_objects.jsonld")
graph.query(sparql)
assert_equal(
RDF::URI("http://schema.org/Example1"),
graph.query([RDF::URI("http://example.org/resource1"), RDF::URI("http://schema.org/someProperty"), nil]).objects.first
)
end
end
18 changes: 18 additions & 0 deletions tests/fix_wikidata_uri_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require 'minitest/autorun'
require 'linkeddata'

class FixWikidataUriTest < Minitest::Test
def setup
@sparql_file = "./sparql/fix_wikidata_uri.sparql"
end

def test_fix_wikidata_uri
sparql = SPARQL.parse(File.read(@sparql_file), update: true)
graph = RDF::Graph.load("./tests/fixtures/test_fix_wikidata_uri.jsonld")
graph.query(sparql)
assert_equal(
RDF::URI("http://www.wikidata.org/entity/Q42"),
graph.query([RDF::URI("http://example.com/entity/123"), RDF::URI("http://schema.org/sameAs"), nil]).objects.first
)
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"@context": "http://schema.org/",
"@type": "Organization",
"@id": "http://www.example.com/org1",
"name": "Sample Organization",
"contactPoint": [
{
"@type": "ContactPoint",
"telephone": "+1-800-555-1234"
},
{
"@type": "ContactPoint",
"telephone": "+1-800-555-5678"
},
{
"@type": "ContactPoint",
"telephone": "+1-800-555-9876"
}
]
}

58 changes: 58 additions & 0 deletions tests/fixtures/test_copy_subevent_data_to_eventseries copy.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"@context": "http://schema.org/",
"@id": "http://example.com/event-series/1",
"@type": "EventSeries",
"subEvent": [
{
"@id": "http://example.com/event/1",
"@type": "Event",
"startDate": "2024-12-01T18:00:00",
"endDate": "2024-12-01T20:00:00",
"performer": {
"@id": "http://example.com/performer/1",
"@type": "Person",
"name": "Artist A"
},
"organizer": {
"@id": "http://example.com/organizer/1",
"@type": "Organization",
"name": "Organizer X"
},
"location": {
"@id": "http://example.com/location/1",
"@type": "Place",
"name": "Venue A"
},
"description": "A wonderful event",
"image": "http://example.com/images/event1.jpg",
"name": "Event 1",
"url": "http://example.com/events/event1"
},
{
"@id": "http://example.com/event/2",
"@type": "Event",
"startDate": "2024-12-02T18:00:00",
"endDate": "2024-12-02T20:00:00",
"performer": {
"@id": "http://example.com/performer/2",
"@type": "Person",
"name": "Artist B"
},
"organizer": {
"@id": "http://example.com/organizer/1",
"@type": "Organization",
"name": "Organizer X"
},
"location": {
"@id": "http://example.com/location/2",
"@type": "Place",
"name": "Venue B"
},
"description": "Another wonderful event",
"image": "http://example.com/images/event2.jpg",
"name": "Event 2",
"url": "http://example.com/events/event2"
}
]
}

Loading

0 comments on commit bf5df4c

Please sign in to comment.