-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
rename _convert to maybe_encode #64
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -9,7 +9,7 @@ function loadfile(T, file::File) | |||||
end | ||||||
|
||||||
function loadfile(T, file::TextFile) | ||||||
replace(read(file.filename, String), "\r"=>"") # ignore CRLF/LF difference | ||||||
_ignore_crlf(read(file.filename, String)) | ||||||
end | ||||||
|
||||||
function loadfile(::Type{<:Number}, file::File{format"TXT"}) | ||||||
|
@@ -24,7 +24,7 @@ function savefile(file::TextFile, content) | |||||
write(file.filename, string(content)) | ||||||
end | ||||||
|
||||||
function query_extended(filename) | ||||||
function query_extended(filename::AbstractString) | ||||||
file, ext = splitext(filename) | ||||||
# TODO: make this less hacky | ||||||
if uppercase(ext) == ".SHA256" | ||||||
|
@@ -38,20 +38,30 @@ function query_extended(filename) | |||||
res | ||||||
end | ||||||
|
||||||
# Some target formats are not supported by FileIO and thus require an encoding/compression process | ||||||
# before saving. For other formats, we should trust IO backends and make as few changes as possible. | ||||||
# Otherwise, reference becomes unfaithful. The encoding process helps making the actual data matches | ||||||
# the reference data, which is load from reference file via IO backends. | ||||||
# | ||||||
# TODO: split `maybe_encode` to `maybe_preprocess` and `maybe_encode` | ||||||
""" | ||||||
_convert(T::Type{<:DataFormat}, x; kw...) -> out | ||||||
maybe_encode(T::Type{<:DataFormat}, x; kw...) -> out | ||||||
|
||||||
Convert `x` to a validate content for file data format `T`. | ||||||
If needed, encode `x` to a valid content that matches format `T`. | ||||||
|
||||||
If there is no known method to encode `x`, then it directly return `x` without warning. | ||||||
""" | ||||||
_convert(::Type{<:DataFormat}, x; kw...) = x | ||||||
maybe_encode(::Type{<:DataFormat}, x; kw...) = x | ||||||
maybe_encode(::Type{<:DataFormat}, x::AbstractString; kw...) = _ignore_crlf(x) | ||||||
maybe_encode(::Type{<:DataFormat}, x::AbstractArray{<:AbstractString}; kw...) = _join(x) | ||||||
|
||||||
# plain TXT | ||||||
_convert(::Type{DataFormat{:TXT}}, x; kw...) = replace(string(x), "\r"=>"") # ignore CRLF/LF difference | ||||||
_convert(::Type{DataFormat{:TXT}}, x::Number; kw...) = x | ||||||
function _convert(::Type{DataFormat{:TXT}}, x::AbstractArray{<:AbstractString}; kw...) | ||||||
return join(x, '\n') | ||||||
end | ||||||
function _convert( | ||||||
maybe_encode(::Type{DataFormat{:TXT}}, x; kw...) = _ignore_crlf(string(x)) | ||||||
maybe_encode(::Type{DataFormat{:TXT}}, x::AbstractArray{<:AbstractString}; kw...) = _join(x) # ambiguity patch | ||||||
maybe_encode(::Type{DataFormat{:TXT}}, x::AbstractString; kw...) = _ignore_crlf(x) # ambiguity patch | ||||||
maybe_encode(::Type{DataFormat{:TXT}}, x::Number; kw...) = x # TODO: Change this to string(x) ? | ||||||
|
||||||
function maybe_encode( | ||||||
::Type{DataFormat{:TXT}}, img::AbstractArray{<:Colorant}; | ||||||
size = (20,40), kw...) | ||||||
|
||||||
|
@@ -65,11 +75,19 @@ function _convert( | |||||
end | ||||||
|
||||||
# SHA256 | ||||||
_convert(::Type{DataFormat{:SHA256}}, x; kw...) = bytes2hex(sha256(string(x))) | ||||||
function _convert(::Type{DataFormat{:SHA256}}, img::AbstractArray{<:Colorant}; kw...) | ||||||
maybe_encode(::Type{DataFormat{:SHA256}}, x; kw...) = _sha256(string(x)) | ||||||
maybe_encode(::Type{DataFormat{:SHA256}}, x::AbstractString) = _sha256(_ignore_crlf(x)) | ||||||
maybe_encode(::Type{DataFormat{:SHA256}}, x::AbstractArray{<:AbstractString}) = _sha256(_join(x)) | ||||||
function maybe_encode(::Type{DataFormat{:SHA256}}, img::AbstractArray{<:Colorant}; kw...) | ||||||
# encode image into SHA256 | ||||||
size_str = bytes2hex(sha256(reinterpret(UInt8,[map(Int64,size(img))...]))) | ||||||
img_str = bytes2hex(sha256(reinterpret(UInt8,vec(rawview(channelview(img)))))) | ||||||
size_str = _sha256(reinterpret(UInt8,[map(Int64,size(img))...])) | ||||||
img_str = _sha256(reinterpret(UInt8,vec(rawview(channelview(img))))) | ||||||
|
||||||
return size_str * img_str | ||||||
end | ||||||
|
||||||
|
||||||
# Helpers | ||||||
_join(x::AbstractArray{<:AbstractString}) = mapreduce(_ignore_crlf, (x,y)->x*"\n"*y, x) | ||||||
johnnychen94 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
_sha256(x) = bytes2hex(sha256(x)) | ||||||
_ignore_crlf(x::AbstractString) = replace(x, "\r"=>"") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we just say we are ignoring
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume you're suggesting Carriage Return ( I renamed to |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
refdir = joinpath(refroot, "fileio") | ||
|
||
@testset "query" begin | ||
check_types = [ | ||
# text types | ||
("textfile_with_no_extension", format"TXT"), | ||
("textfile.txt", format"TXT"), | ||
("textfile.unknown", format"TXT"), | ||
("textfile.sha256", format"SHA256"), | ||
|
||
# image types | ||
("imagefile.jpg", format"JPEG"), | ||
("imagefile.jpeg", format"JPEG"), | ||
("imagefile.png", format"PNG"), | ||
("imagefile.tif", format"TIFF"), | ||
("imagefile.tiff", format"TIFF"), | ||
|
||
# dataframe types | ||
("dataframe_file.csv", format"CSV") | ||
] | ||
for (file, fmt) in check_types | ||
@test ReferenceTests.query_extended(file) == File{fmt}(file) | ||
@test ReferenceTests.query_extended(abspath(file)) == File{fmt}(abspath(file)) | ||
end | ||
end | ||
|
||
@testset "maybe_encode" begin | ||
@testset "string" begin | ||
str1 = "Hello world" | ||
str1_sha256 = "64ec88ca00b268e5ba1a35678a1b5316d212f4f366b2477232534a8aeca37f3c" | ||
str2 = "Hello\n world" | ||
str2_sha256 = "60b65ab310480818c4289227f2ec68f1714743db8571b4cb190e100c0085be3d" # bytes2hex(SHA.sha256(str2)) | ||
str2_crlf = "Hello\n\r world" | ||
str3 = "Hello\nworld" | ||
str3_sha256 = "46e0ea795802f17d0b340983ca7d7068c94d7d9172ee4daea37a1ab1168649ec" # bytes2hex(SHA.sha256(str3)) | ||
str3_arr1 = ["Hello", "world"] | ||
str3_arr2 = ["Hello" "world"] | ||
str4 = "Hello\n world1\nHello\n world2" | ||
str4_sha256 = "c7dc8b82c3a6fed4afa0c8790a0586b73df0e4f35524efe6810e5d78b6b6a611" # bytes2hex(SHA.sha256(str4)) | ||
str4_arr = ["Hello\n\r world1", "Hello\n world2"] | ||
|
||
# string as plain text | ||
for fmt in (format"TXT", format"UNKNOWN") | ||
# convert should respect whitespaces | ||
@test str1 == ReferenceTests.maybe_encode(fmt, str1) | ||
@test str2 == ReferenceTests.maybe_encode(fmt, str2) | ||
# but ignore CRLF/LF differences | ||
@test str2 == ReferenceTests.maybe_encode(fmt, str2_crlf) | ||
# string arrays are treated as multi-line strings, even for UNKNOWN format | ||
@test str3 == ReferenceTests.maybe_encode(fmt, str3) | ||
@test str3 == ReferenceTests.maybe_encode(fmt, str3_arr1) | ||
@test str3 == ReferenceTests.maybe_encode(fmt, str3_arr2) | ||
# string arrays should ignore CRLF/LF differences, too | ||
@test str4 == ReferenceTests.maybe_encode(fmt, str4_arr) | ||
end | ||
|
||
# string as SHA256 should also ignore CRLF/LF differences | ||
fmt = format"SHA256" | ||
@test str1_sha256 == ReferenceTests.maybe_encode(fmt, str1) | ||
@test str2_sha256 == ReferenceTests.maybe_encode(fmt, str2) | ||
# but ignore CRLF/LF differences | ||
@test str2_sha256 == ReferenceTests.maybe_encode(fmt, str2_crlf) | ||
# string arrays are treated as multi-line strings, even for UNKNOWN format | ||
@test str3_sha256 == ReferenceTests.maybe_encode(fmt, str3) | ||
@test str3_sha256 == ReferenceTests.maybe_encode(fmt, str3_arr1) | ||
@test str3_sha256 == ReferenceTests.maybe_encode(fmt, str3_arr2) | ||
# string arrays should ignore CRLF/LF differences, too | ||
@test str4_sha256 == ReferenceTests.maybe_encode(fmt, str4_arr) | ||
end | ||
|
||
@testset "numbers" begin | ||
for num in (0x01, 1, 1.0f0, 1.0) | ||
for fmt in (format"TXT", format"UNKNOWN") | ||
@test num === ReferenceTests.maybe_encode(fmt, num) | ||
end | ||
fmt = format"SHA256" | ||
@test ReferenceTests.maybe_encode(fmt, num) == ReferenceTests.maybe_encode(fmt, string(num)) | ||
end | ||
|
||
|
||
for (fmt, a, ref) in [ | ||
# if target is TXT, convert it to string | ||
(format"TXT", [1, 2], "[1, 2]"), | ||
(format"TXT", [1,2], "[1, 2]"), | ||
(format"TXT", [1;2], "[1, 2]"), | ||
(format"TXT", [1 2], "[1 2]"), | ||
(format"TXT", [1 2; 3 4], "[1 2; 3 4]"), | ||
# if target is Unknown, make no change | ||
(format"UNKNOWN", [1, 2], [1, 2]), | ||
(format"UNKNOWN", [1,2], [1, 2]), | ||
(format"UNKNOWN", [1;2], [1, 2]), | ||
(format"UNKNOWN", [1 2], [1 2]), | ||
(format"UNKNOWN", [1 2; 3 4], [1 2; 3 4]), | ||
] | ||
@test ref == ReferenceTests.maybe_encode(fmt, a) | ||
end | ||
|
||
for a in [[1, 2], [1 2], [1 2; 3 4]] | ||
fmt = format"SHA256" | ||
@test ReferenceTests.maybe_encode(fmt, a) == ReferenceTests.maybe_encode(fmt, string(a)) | ||
end | ||
|
||
end | ||
|
||
@testset "image" begin | ||
gray_1d = Gray{N0f8}.(0.0:0.1:0.9) | ||
rgb_1d = RGB.(gray_1d) | ||
gray_2d = Gray{N0f8}.(reshape(0.0:0.1:0.9, 2, 5)) | ||
rgb_2d = RGB.(gray_2d) | ||
gray_3d = Gray{N0f8}.(reshape(0.0:0.02:0.95, 2, 4, 6)) | ||
rgb_3d = RGB.(gray_3d) | ||
|
||
# any common image types | ||
for img in (gray_1d, gray_2d, gray_3d, rgb_1d, rgb_2d, rgb_3d) | ||
for fmt in (format"JPEG", format"PNG", format"TIFF", format"UNKNOWN") | ||
@test img === ReferenceTests.maybe_encode(fmt, img) | ||
end | ||
end | ||
|
||
# image as text file | ||
fmt = format"TXT" | ||
# TODO: support n-D image encoding | ||
# @test_reference joinpath(refdir, "gray_1d_as_txt.txt") ReferenceTests.maybe_encode(fmt, gray_1d) | ||
# @test_reference joinpath(refdir, "rgb_1d_as_txt.txt") ReferenceTests.maybe_encode(fmt, rgb_1d) | ||
@test_reference joinpath(refdir, "gray_2d_as_txt.txt") ReferenceTests.maybe_encode(fmt, gray_2d) | ||
@test_reference joinpath(refdir, "rgb_2d_as_txt.txt") ReferenceTests.maybe_encode(fmt, rgb_2d) | ||
# @test_reference joinpath(refdir, "gray_3d_as_txt.txt") ReferenceTests.maybe_encode(fmt, gray_3d) | ||
# @test_reference joinpath(refdir, "rgb_3d_as_txt.txt") ReferenceTests.maybe_encode(fmt, rgb_3d) | ||
|
||
# image as SHA256 | ||
fmt = format"SHA256" | ||
for (file, img) in [ | ||
("gray_1d", gray_1d), | ||
("gray_2d", gray_2d), | ||
("gray_3d", gray_3d), | ||
("rgb_1d", rgb_1d), | ||
("rgb_2d", rgb_2d), | ||
("rgb_3d", rgb_3d) | ||
] | ||
reffile = joinpath(refdir, "$(file)_as_sha256.txt") | ||
@test_reference reffile ReferenceTests.maybe_encode(fmt, img) | ||
end | ||
end | ||
|
||
# dataframe | ||
@testset "dataframe" begin | ||
df = DataFrame(v1=[1,2,3], v2=["a","b","c"]) | ||
|
||
@test string(df) == ReferenceTests.maybe_encode(format"TXT", df) | ||
for fmt in (format"CSV", format"UNKNOWN") | ||
@test df === ReferenceTests.maybe_encode(fmt, df) | ||
end | ||
|
||
fmt = format"SHA256" | ||
@test_reference joinpath(refdir, "dataframe_as_sha256.txt") ReferenceTests.maybe_encode(fmt, df) | ||
|
||
end | ||
end | ||
|
||
# TODO: savefile & loadfile |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
2cf7c4edcafc27a5eb1b74fb0af704edc0d9bbef91a1b55d3b7350fa4b54cd18 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
a111f275cc2e7588000001d300a31e76336d15b9d314cd1a1d8f3d3556975eed10ef43c7fcace84c4d0d54b8e92c0c9be2d14a6bf3dd7647254a3cc0c4a04297 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
26cfbb315c316a0b15516434f90284e5011dcb58503fe39eb036bf669bd8233d10ef43c7fcace84c4d0d54b8e92c0c9be2d14a6bf3dd7647254a3cc0c4a04297 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[0m[38;5;232;48;5;234m▀[38;5;237;48;5;239m▀[38;5;241;48;5;244m▀[38;5;246;48;5;248m▀[38;5;250;48;5;253m▀[0m |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
72307e420b5460c03a1c167060ed336407c26ea74aabf8fab76dd8e9dbe8cbe4baf0f53196e8d5270c0b0b2da82bbbb4676edbb0ebf84ec0dcbd8c0bf4d9af68 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
a111f275cc2e7588000001d300a31e76336d15b9d314cd1a1d8f3d3556975eedebd6b0ad29dd5402ce5745bb5b48d4c59b7f8da0cdf8d2f287befd9094f6ac89 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
26cfbb315c316a0b15516434f90284e5011dcb58503fe39eb036bf669bd8233debd6b0ad29dd5402ce5745bb5b48d4c59b7f8da0cdf8d2f287befd9094f6ac89 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[0m[38;5;232;48;5;234m▀[38;5;237;48;5;239m▀[38;5;241;48;5;244m▀[38;5;246;48;5;248m▀[38;5;250;48;5;253m▀[0m |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
72307e420b5460c03a1c167060ed336407c26ea74aabf8fab76dd8e9dbe8cbe45465bcbf50acdbe5600207e3266eedef6548bc4d244e55d7a1af0f1af09e019f |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function expects only string types
splitext
, I added this to make sure nothing strange is passed here as a safety net.