Skip to content

Commit a93f8da

Browse files
authored
Towards finalized Blobentry (#1187)
1 parent cc4511e commit a93f8da

18 files changed

+260
-286
lines changed

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
1616
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
1717
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1818
ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb"
19+
NanoDates = "46f1a544-deae-4307-8689-c12aa3c955c6"
1920
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
2021
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
2122
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
@@ -56,6 +57,7 @@ JSON = "1.0.0"
5657
LieGroups = "0.1"
5758
LinearAlgebra = "1.10"
5859
ManifoldsBase = "1, 2"
60+
NanoDates = "1.0.3"
5961
OrderedCollections = "1.4"
6062
Pkg = "1.4, 1.5"
6163
ProgressMeter = "1"
Lines changed: 83 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,124 @@
1-
21
##==============================================================================
32
## Blobentry
43
##==============================================================================
5-
#TODO think origin and buildSourceString should be deprecated, description can be used instead
6-
#TODO hash - maybe use both crc32c for fast error check and sha256 for strong integrity check
7-
# stored seperately as crc and sha or as a tuple `hash::Tuple{Symbol, String}` where Symbol is :crc32c or :sha256
8-
# or an enum with suppored hash types
94
"""
105
$(TYPEDEF)
116
127
A `Blobentry` is a small about of structured data that holds reference information to find an actual blob. Many `Blobentry`s
138
can exist on different graph nodes spanning Agents and Factor Graphs which can all reference the same `Blob`.
149
1510
Notes:
16-
- `blobId`s should be unique within a blobstore and are immutable.
11+
- `blobid`s should be unique within a blobstore and are immutable.
1712
"""
18-
Base.@kwdef struct Blobentry
19-
""" Remotely assigned and globally unique identifier for the `Blobentry` itself (not the `.blobId`). """
20-
id::Union{UUID, Nothing} = nothing
21-
""" Machine friendly and globally unique identifier of the 'Blob', usually assigned from a common point in the system. This can be used to guarantee unique retrieval of the large data blob. """
22-
blobId::UUID = uuid4()
13+
StructUtils.@kwarg struct Blobentry
2314
""" Human friendly label of the `Blob` and also used as unique identifier per node on which a `Blobentry` is added. E.g. do "LEFTCAM_1", "LEFTCAM_2", ... of you need to repeat a label on the same variable. """
2415
label::Symbol
25-
""" A hint about where the `Blob` itself might be stored. Remember that a Blob may be duplicated over multiple blobstores. """
16+
""" The label of the `Blobstore` in which the `Blob` is stored. Default is `:default`."""
2617
blobstore::Symbol = :default
27-
""" A hash value to ensure data consistency which must correspond to the stored hash upon retrieval. Use `bytes2hex(sha256(blob))`. [Legacy: some usage functions allow the check to be skipped if needed.] """
28-
hash::String = ""# Probably https://docs.julialang.org/en/v1/stdlib/SHA
29-
""" Context from which a Blobentry=>Blob was first created. E.g. agent|graph|varlabel. """
18+
""" Machine friendly and unique within a `Blobstore` identifier of the 'Blob'."""
19+
blobid::UUID = uuid4() # was blobId
20+
""" (Optional) crc32c hash value to ensure data consistency which must correspond to the stored hash upon retrieval."""
21+
crchash::Union{UInt32, Nothing} =
22+
nothing & (
23+
json=(
24+
lower = h->isnothing(h) ? nothing : string(h, base = 16),
25+
lift = s->isnothing(s) ? nothing : parse(UInt32, s; base = 16),
26+
)
27+
)
28+
""" (Optional) sha256 hash value to ensure data consistency which must correspond to the stored hash upon retrieval."""
29+
shahash::Union{Vector{UInt8}, Nothing} =
30+
nothing & (
31+
json=(
32+
lower = h->isnothing(h) ? nothing : bytes2hex(h),
33+
lift = s->isnothing(s) ? nothing : hex2bytes(s),
34+
)
35+
)
36+
""" Source system or application where the blob was created (e.g., webapp, sdk, robot)"""
3037
origin::String = ""
31-
""" number of bytes in blob as a string"""
32-
size::String = "-1"
38+
"""Number of bytes in blob serialized as a string"""
39+
size::Int64 = -1 & (json=(lower = string, lift = x->parse(Int64, x)))
3340
""" Additional information that can help a different user of the Blob. """
3441
description::String = ""
35-
""" MIME description describing the format of binary data in the `Blob`, e.g. 'image/png' or 'application/json; _type=CameraModel'. """
36-
mimeType::String = "application/octet-stream"
37-
""" Additional storage for functional metadata used in some scenarios, e.g. to support advanced features such as `parsejson(base64decode(entry.metadata))['time_sync']`. """
38-
metadata::String = "e30="
39-
""" When the Blob itself was first created. """
40-
timestamp::ZonedDateTime = now(localzone())
41-
""" When the Blobentry was created. """
42-
createdTimestamp::Union{ZonedDateTime, Nothing} = nothing
43-
""" Use carefully, but necessary to support advanced usage such as time synchronization over Blob data. """
44-
lastUpdatedTimestamp::Union{ZonedDateTime, Nothing} = nothing
42+
""" MIME description describing the format of binary data in the `Blob`, e.g. 'image/png' or 'application/json'. """
43+
mimetype::String = "application/octet-stream" #FIXME ::MIME = MIME("application/octet-stream")
44+
""" Storage for a couple of bytes directly in the graph. Use with caution and keep it small and simple."""
45+
metadata::JSONText = JSONText("{}")
46+
""" When the Blob itself was first created. Serialized as an ISO 8601 string."""
47+
timestamp::NanoDate = ndnow(UTC) & (json = (lower = timestamp,),)
4548
""" Type version of this Blobentry."""
46-
_version::VersionNumber = _getDFGVersion()
49+
version::VersionNumber = version(Blobentry)
4750
end
51+
version(::Type{Blobentry}) = v"0.1.0"
52+
version(node) = node.version
4853

4954
function Blobentry(label::Symbol, blobstore = :default; kwargs...)
5055
return Blobentry(; label, blobstore, kwargs...)
5156
end
5257
# construction helper from existing Blobentry for user overriding via kwargs
5358
function Blobentry(
5459
entry::Blobentry;
55-
id::Union{UUID, Nothing} = entry.id,
56-
blobId::UUID = entry.blobId,
60+
blobid::UUID = entry.blobid,
5761
label::Symbol = entry.label,
5862
blobstore::Symbol = entry.blobstore,
59-
hash::String = entry.hash,
60-
size::Union{String, Int, Nothing} = entry.size,
63+
crchash = entry.crchash,
64+
shahash = entry.shahash,
65+
size::Int64 = entry.size,
6166
origin::String = entry.origin,
6267
description::String = entry.description,
63-
mimeType::String = entry.mimeType,
64-
metadata::String = entry.metadata,
68+
mimetype::String = entry.mimetype,
69+
metadata::JSONText = entry.metadata,
6570
timestamp::ZonedDateTime = entry.timestamp,
66-
createdTimestamp = entry.createdTimestamp,
67-
lastUpdatedTimestamp = entry.lastUpdatedTimestamp,
68-
_version = entry._version,
71+
version = entry.version,
6972
)
7073
return Blobentry(;
71-
id,
72-
blobId,
7374
label,
7475
blobstore,
75-
hash,
76+
blobid,
77+
crchash,
78+
shahash,
7679
origin,
77-
size = string(size),
80+
size,
7881
description,
79-
mimeType,
82+
mimetype,
8083
metadata,
8184
timestamp,
82-
createdTimestamp,
83-
lastUpdatedTimestamp,
84-
_version,
85+
version,
8586
)
8687
end
88+
89+
#TODO deprecated in v0.29
90+
function Base.getproperty(x::Blobentry, f::Symbol)
91+
if f in [:id, :createdTimestamp, :lastUpdatedTimestamp]
92+
error("Blobentry field $f has been deprecated")
93+
elseif f == :hash
94+
error("Blobentry field :hash has been deprecated; use :crchash or :shahash instead")
95+
elseif f == :blobId
96+
@warn "Blobentry field :blobId has been renamed to :blobid"
97+
return getfield(x, :blobid)
98+
elseif f == :mimeType
99+
@warn "Blobentry field :mimeType has been renamed to :mimetype"
100+
return getfield(x, :mimetype)
101+
elseif f == :_version
102+
@warn "Blobentry field :_version has been renamed to :version"
103+
return getfield(x, :version)
104+
else
105+
getfield(x, f)
106+
end
107+
end
108+
109+
function Base.setproperty!(x::Blobentry, f::Symbol, val)
110+
if f == :blobId
111+
@warn "Blobentry field :blobId has been renamed to :blobid"
112+
setfield!(x, :blobid, val)
113+
elseif f == :mimeType
114+
@warn "Blobentry field :mimeType has been renamed to :mimetype"
115+
setfield!(x, :mimetype, val)
116+
elseif f == :_version
117+
@warn "Blobentry field :_version has been renamed to :version"
118+
setfield!(x, :version, val)
119+
elseif f in [:id, :createdTimestamp, :lastUpdatedTimestamp, :hash]
120+
error("Blobentry field $f has been deprecated")
121+
else
122+
setfield!(x, f, val)
123+
end
124+
end

src/DataBlobs/entities/BlobStores.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,25 @@ Abstract supertype for all blobstore implementations.
77
88
Subtypes of `AbstractBlobstore{T}` must implement the required interface for blob storage and retrieval, such as:
99
10-
- `add!(store, blobId, blob)`: Add a new blob to the store.
11-
- `get(store, blobId)`: Retrieve a blob by its ID.
10+
- `add!(store, blobid, blob)`: Add a new blob to the store.
11+
- `get(store, blobid)`: Retrieve a blob by its ID.
1212
- `list(store)`: List all blob IDs in the store.
1313
1414
The parameter `T` represents the type of blobs stored (e.g., `Vector{UInt8}` or a custom `Blob` type).
1515
1616
See concrete implementations for details.
1717
1818
Design Notes
19-
- `blobId` is not considered unique across blobstores with different labels only within a single blobstore.
20-
- We cannot guarantee that `blobId` is unique across different blobstores with the same label and this is up to the end user.
19+
- `blobid` is not considered unique across blobstores with different labels only within a single blobstore.
20+
- We cannot guarantee that `blobid` is unique across different blobstores with the same label and this is up to the end user.
2121
- Within a single blobstore `addBlob!` will fail if there is a UUID collision.
22-
- TODO: We should consider using uuid7 for `blobId`s (requires jl v1.12).
22+
- TODO: We should consider using uuid7 for `blobid`s (requires jl v1.12).
2323
- `Blobstrores`are identified by a `label::Symbol`, which allows for multiple blobstores to coexist in the same system.
2424
25-
TODO: If we want to make the `blobId`=>Blob pair immutable:
25+
TODO: If we want to make the `blobid`=>Blob pair immutable:
2626
- We can use the tombstone pattern to mark a blob as deleted. See FolderStore in PR#TODO.
2727
28-
Design goal: all `Blobstore`s with the same `label` can contain the same `blobId`=>`Blob` pair and the blobs should be identical since they are immutable.
28+
Design goal: all `Blobstore`s with the same `label` can contain the same `blobid`=>`Blob` pair and the blobs should be identical since they are immutable.
2929
3030
"""
3131
abstract type AbstractBlobstore{T} end

src/DataBlobs/services/BlobEntry.jl

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
##==============================================================================
22
## Blobentry - common
33
##==============================================================================
4-
#TODO think origin and buildSourceString should be deprecated, description can be used instead
4+
#TODO think buildSourceString should be deprecated.
55
"""
66
$(SIGNATURES)
77
Function to generate source string - agentLabel|graphLabel|varLabel
@@ -17,32 +17,43 @@ end
1717
# label
1818
# id
1919

20-
getHash(entry::Blobentry) = hex2bytes(entry.hash)
2120
getTimestamp(entry::Blobentry) = entry.timestamp
2221

23-
function assertHash(de::Blobentry, db; hashfunction::Function = sha256)
24-
getHash(de) === nothing && @warn "Missing hash?" && return true
25-
if hashfunction(db) == getHash(de)
26-
return true #or nothing?
27-
else
28-
error("Stored hash and data blob hash do not match")
22+
"""
23+
checkHash(entry::Blobentry, blob) -> Union{Bool,Nothing}
24+
25+
Checks the integrity of a blob against the hashes (crc32c, sha256) stored in the given `Blobentry`.
26+
27+
- Returns `true` if all present hashes (`crchash`, `shahash`) match the computed values from `blob`.
28+
- Returns `false` if any present hash does not match.
29+
- Returns `nothing` if no hashes are stored in the `Blobentry` to check against.
30+
"""
31+
function checkHash(entry::Blobentry, blob)
32+
if !isnothing(entry.crchash)
33+
crc32c(blob) != entry.crchash && return false
2934
end
35+
if entry.shahash != ""
36+
sha256(blob) != entry.shahash && return false
37+
end
38+
if isnothing(entry.crchash) && entry.shahash == ""
39+
return nothing
40+
end
41+
return true
3042
end
3143

32-
function Base.show(io::IO, ::MIME"text/plain", entry::Blobentry)
33-
println(io, "Blobentry {")
34-
println(io, " id: ", entry.id)
35-
println(io, " blobId: ", entry.blobId)
36-
println(io, " label: ", entry.label)
37-
println(io, " blobstore: ", entry.blobstore)
38-
println(io, " hash: ", entry.hash)
39-
println(io, " origin: ", entry.origin)
40-
println(io, " description: ", entry.description)
41-
println(io, " mimeType: ", entry.mimeType)
42-
println(io, " timestamp ", entry.timestamp)
43-
println(io, " _version: ", entry._version)
44-
return println(io, "}")
45-
end
44+
# function Base.show(io::IO, ::MIME"text/plain", entry::Blobentry)
45+
# println(io, "Blobentry {")
46+
# println(io, " id: ", entry.id)
47+
# println(io, " blobid: ", entry.blobid)
48+
# println(io, " label: ", entry.label)
49+
# println(io, " blobstore: ", entry.blobstore)
50+
# println(io, " origin: ", entry.origin)
51+
# println(io, " description: ", entry.description)
52+
# println(io, " mimetype: ", entry.mimetype)
53+
# println(io, " timestamp ", entry.timestamp)
54+
# println(io, " version: ", entry.version)
55+
# return println(io, "}")
56+
# end
4657

4758
##==============================================================================
4859
## Blobentry - CRUD
@@ -238,7 +249,7 @@ function getBlobentries(
238249
)
239250
entries = getBlobentries(v)
240251
filterDFG!(entries, labelFilter, getLabel)
241-
filterDFG!(entries, blobIdFilter, x -> string(x.blobId))
252+
filterDFG!(entries, blobIdFilter, x -> string(x.blobid))
242253
return entries
243254
end
244255

src/DataBlobs/services/BlobPacking.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ function unpackBlob(::Type{format"JSON"}, blob::Vector{UInt8})
5252
return String(copy(blob))
5353
end
5454

55-
unpackBlob(entry::Blobentry, blob::Vector{UInt8}) = unpackBlob(entry.mimeType, blob)
55+
unpackBlob(entry::Blobentry, blob::Vector{UInt8}) = unpackBlob(entry.mimetype, blob)
5656
unpackBlob(eb::Pair{<:Blobentry, Vector{UInt8}}) = unpackBlob(eb[1], eb[2])
5757

5858
# 2/ FileIO

0 commit comments

Comments
 (0)