Skip to content

Commit

Permalink
Add find_distinct_datastreams.
Browse files Browse the repository at this point in the history
  • Loading branch information
markpbaggett committed Jan 4, 2020
1 parent c9b88da commit 995d9bb
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
20 changes: 19 additions & 1 deletion src/moldybread.nim
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,17 @@ when isMainModule:
##
## moldybread -o validate_checksums -n test -y /full/path/to/my/yaml/file.yml
##
## Find Unique Datastreams
## =======================
##
## You can get a list of unique datastreams from objects across a result set.
##
## Example command:
##
## .. code-block:: sh
##
## moldybread -o find_distinct_datastreams -n test -y /full/path/to/my/yaml/file.yml
##
const banner = """
__ __ _ _ ____ _
| \/ | ___ | | __| |_ _ | __ ) _ __ ___ __ _ __| |
Expand All @@ -250,7 +261,7 @@ when isMainModule:
"""
var p = newParser(fmt"Moldy Bread: See https://markpbaggett.github.io/moldybread/moldybread.html for documentation and examples on how to use this package.{'\n'}{'\n'}"):
help(banner)
option("-o", "--operation", help="Specify operation", choices = @["harvest_datastream", "harvest_datastream_no_pages", "update_metadata", "update_metadata_and_delete_old_versions", "download_foxml", "version_datastream", "change_object_state", "purge_old_versions", "find_objs_missing_dsid", "get_datastream_history", "get_datastream_at_date", "validate_checksums"])
option("-o", "--operation", help="Specify operation", choices = @["harvest_datastream", "harvest_datastream_no_pages", "update_metadata", "update_metadata_and_delete_old_versions", "download_foxml", "version_datastream", "change_object_state", "purge_old_versions", "find_objs_missing_dsid", "get_datastream_history", "get_datastream_at_date", "validate_checksums", "find_distinct_datastreams"])
option("-d", "--dsid", help="Specify datastream id.", default="")
option("-n", "--namespaceorpid", help="Populate results based on namespace or PID.", default="")
option("-dc", "--dcsearch", help="Populate results based on dc field and strings. See docs for formatting info.", default="")
Expand Down Expand Up @@ -381,6 +392,13 @@ when isMainModule:
echo fmt"{'\n'}{len(test.successes)} objects had valid checksums for their {opts.dsid} datastream. {len(test.errors)} objects had invalid checksums on their {opts.dsid} datastream."
if len(test.errors) > 0:
echo test.errors
of "find_distinct_datastreams":
if opts.namespaceorpid == "" and opts.dcsearch == "" and opts.terms == "":
echo "Must specify how you want to populated results: -p for Pid or Namespace, -dc for dc fields and strings, or -t for keyword terms."
else:
fedora_connection.results = fedora_connection.populate_results()
let result = fedora_connection.find_distinct_datastreams()
echo fmt"{'\n'}{'\n'}There are {len(result)} unique datastreams across this result set: {'\n'}{result}"
of "update_metadata":
if opts.path != "":
yaml_settings.directory_path = opts.path
Expand Down
22 changes: 22 additions & 0 deletions src/moldybreadpkg/fedora.nim
Original file line number Diff line number Diff line change
Expand Up @@ -709,3 +709,25 @@ method validate_checksums*(this: FedoraRequest): Message {. base .} =
bar.increment()
bar.finish()
Message(errors: errors, successes: successes, attempts: attempts)

method find_distinct_datastreams*(this: FedoraRequest): seq[string] {. base .} =
## Filters distinct datastreams from all objects in a result set.
##
## Example:
##
## .. code-block:: nim
##
## let fedora_connection = initFedoraRequest(output_directory="/home/mark/nim_projects/moldybread/experiment", pid_part="test")
## fedora_connection.results = fedora_connection.populate_results()
## echo fedora_connection.find_distinct_datastreams()
##
var bar = newProgressBar()
let datastream_report = this.get_datastreams()
echo "\n\nFiltering unique datastreams from result set.\n"
bar.start()
for i in 1..len(datastream_report):
for datastream in datastream_report[i-1][1]:
if datastream notin result:
result.add(datastream)
bar.increment()
bar.finish()

0 comments on commit 995d9bb

Please sign in to comment.