diff --git a/README.md b/README.md
index 3aba58b..03aebb0 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ artsdata-push:
version:
reportCallbackUrl:
shacl:
+ fetch-urls-headlessly:
+ offset:
```
@@ -51,10 +53,27 @@ artsdata-push:
| `version` | Version of the artifact. Usually a date (e.g., 2020-10-23). Use unreserved characters. (If not provided, version will be set as the current date).
| `reportCallbackUrl ` | URL to send back the data validation report asynchronously using POST "Content-Type: application/json".
| `shacl` | URL to the SHACL file to perform validations.
-
+| `fetch-urls-headlessly` | Fetch the URLs of entities using a headless browser(defaults to false).
+| `offset` | Offset for pagination strategy (defaults to 1).
## Potential Issues
Remember to use only unreserved characters ([0-9a-zA-Z-._]) for input variables where mentioned.
+
+# Release Instructions
+
+When preparing a release for the artsdata-pipeline-action, please follow these versioning guidelines:
+
+## Minor release (e.g., 2.0.7 → 2.0.8):
+
+For small feature additions or bug fixes.
+
+## Major release (e.g., 2.0.7 → 2.1.0):
+
+For larger changes or significant improvements that could impact compatibility.
+
+## Significant Update (e.g., 2.0.7 → 3.0.0):
+
+For major overhauls or breaking changes. If there's a drastic change in functionality or usage, increment to the next "big update" version.
\ No newline at end of file
diff --git a/src/lib/graph_fetcher.rb b/src/lib/graph_fetcher.rb
index 10de379..f20ed3d 100644
--- a/src/lib/graph_fetcher.rb
+++ b/src/lib/graph_fetcher.rb
@@ -10,7 +10,7 @@ def self.load(entity_urls: [], base_url: nil, headers: nil, headless: false)
@entity_urls = entity_urls
@base_url = base_url
@headers = headers ||= {"User-Agent" => "artsdata-crawler"}
- @graph = if headless
+ @graph = if headless == "true"
headless_browser = HeadlessBrowser.new(headers)
headless_browser.fetch_json_ld_objects(entity_urls)
else
diff --git a/src/lib/rdf_processor.rb b/src/lib/rdf_processor.rb
index f892195..687bc9c 100644
--- a/src/lib/rdf_processor.rb
+++ b/src/lib/rdf_processor.rb
@@ -7,17 +7,18 @@ def self.process_rdf(entity_urls, base_url, headers)
add_url_sparql_file = File.read('./sparql/add_derived_from.sparql')
entity_urls.each do |entity_url|
- puts "Processing #{entity_url} in non-headless mode"
- entity_url = entity_url.gsub(' ', '+')
- options = { rdfstar: true, headers: headers }
- loaded_graph = RDF::Graph.load(entity_url, **options)
- sparql_file_with_url = add_url_sparql_file.gsub("subject_url", entity_url)
- loaded_graph.query(SPARQL.parse(sparql_file_with_url, update: true))
- graph << loaded_graph
- graph
+ begin
+ puts "Processing #{entity_url} in non-headless mode"
+ entity_url = entity_url.gsub(' ', '+')
+ options = { headers: headers }
+ loaded_graph = RDF::Graph.load(entity_url, **options)
+ sparql_file_with_url = add_url_sparql_file.gsub("subject_url", entity_url)
+ loaded_graph.query(SPARQL.parse(sparql_file_with_url, update: true))
+ graph << loaded_graph
+ rescue StandardError => e
+ puts "Error loading RDF from #{entity_url}: #{e.message}"
+ end
end
graph
- rescue StandardError => e
- puts "Error loading RDF from #{entity_url}: #{e.message}"
end
-end
+end
\ No newline at end of file