Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] improve test quality #61

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 37 additions & 15 deletions src/fileio.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ function loadfile(T, file::File)
end

function loadfile(T, file::TextFile)
replace(read(file.filename, String), "\r"=>"") # ignore CRLF/LF difference
_ignore_CR(read(file.filename, String))
end

function loadfile(::Type{<:Number}, file::File{format"TXT"})
Expand All @@ -24,7 +24,7 @@ function savefile(file::TextFile, content)
write(file.filename, string(content))
end

function query_extended(filename)
function query_extended(filename::AbstractString)
file, ext = splitext(filename)
# TODO: make this less hacky
if uppercase(ext) == ".SHA256"
Expand All @@ -38,20 +38,28 @@ function query_extended(filename)
res
end

# Some target formats are not supported by FileIO and thus require an encoding/compression process
# before saving. For other formats, we should trust IO backends and make as few changes as possible.
# Otherwise, reference becomes unfaithful. The encoding process helps making the actual data matches
# the reference data, which is loaded from reference file via IO backends.
#
# TODO: split `maybe_encode` to `maybe_preprocess` and `maybe_encode`
"""
_convert(T::Type{<:DataFormat}, x; kw...) -> out
maybe_encode(T::Type{<:DataFormat}, x; kw...) -> out

Convert `x` to a validate content for file data format `T`.
If needed, encode `x` to a valid content that matches format `T`.

If there is no known method to encode `x`, then it directly return `x` without warning.
"""
_convert(::Type{<:DataFormat}, x; kw...) = x
maybe_encode(::Type{<:DataFormat}, x; kw...) = x

# plain TXT
_convert(::Type{DataFormat{:TXT}}, x; kw...) = replace(string(x), "\r"=>"") # ignore CRLF/LF difference
_convert(::Type{DataFormat{:TXT}}, x::Number; kw...) = x
function _convert(::Type{DataFormat{:TXT}}, x::AbstractArray{<:AbstractString}; kw...)
return join(x, '\n')
end
function _convert(
maybe_encode(::Type{DataFormat{:TXT}}, x; kw...) = _ignore_CR(string(x))
maybe_encode(::Type{DataFormat{:TXT}}, x::AbstractArray{<:AbstractString}; kw...) = _join(x)
maybe_encode(::Type{DataFormat{:TXT}}, x::AbstractString; kw...) = _ignore_CR(x)
maybe_encode(::Type{DataFormat{:TXT}}, x::Number; kw...) = x # TODO: Change this to string(x) ?

function maybe_encode(
::Type{DataFormat{:TXT}}, img::AbstractArray{<:Colorant};
size = (20,40), kw...)

Expand All @@ -65,11 +73,25 @@ function _convert(
end

# SHA256
_convert(::Type{DataFormat{:SHA256}}, x; kw...) = bytes2hex(sha256(string(x)))
function _convert(::Type{DataFormat{:SHA256}}, img::AbstractArray{<:Colorant}; kw...)
maybe_encode(::Type{DataFormat{:SHA256}}, x; kw...) = _sha256(string(x))
maybe_encode(::Type{DataFormat{:SHA256}}, x::AbstractString) = _sha256(_ignore_CR(x))
maybe_encode(::Type{DataFormat{:SHA256}}, x::AbstractArray{<:AbstractString}) = _sha256(_join(x))
function maybe_encode(::Type{DataFormat{:SHA256}}, img::AbstractArray{<:Colorant}; kw...)
# encode image into SHA256
size_str = bytes2hex(sha256(reinterpret(UInt8,[map(Int64,size(img))...])))
img_str = bytes2hex(sha256(reinterpret(UInt8,vec(rawview(channelview(img))))))
size_str = _sha256(reinterpret(UInt8,[map(Int64,size(img))...]))
img_str = _sha256(reinterpret(UInt8,vec(rawview(channelview(img)))))

return size_str * img_str
end

# Helpers
_join(x::AbstractArray{<:AbstractString}) = _ignore_CR(join(x, "\n"))
_sha256(x) = bytes2hex(sha256(x))
"""
_ignore_CR(x::AbstractString)

Ignore the CRLF(`\\r\\n`) and LF(`\\n`) difference by removing `\\r` from the given string.

CRLF format is widely used by Windows while LF format is mainly used by Linux.
"""
_ignore_CR(x::AbstractString) = replace(x, "\r\n"=>"\n") # issue #39
2 changes: 1 addition & 1 deletion src/test_reference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ function test_reference(
rendermode = default_rendermode(F, raw_actual)
end

actual = _convert(F, raw_actual; kw...)
actual = maybe_encode(F, raw_actual; kw...)
# preprocessing when reference file doesn't exists
if !isfile(path)
@info("Reference file for \"$filename\" does not exist. It will be created")
Expand Down
Empty file added test/equality_metrics.jl
Empty file.
165 changes: 165 additions & 0 deletions test/fileio.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
refdir = joinpath(refroot, "fileio")

@testset "query" begin
check_types = [
# text types
("textfile_with_no_extension", format"TXT"),
("textfile.txt", format"TXT"),
("textfile.unknown", format"TXT"),
("textfile.sha256", format"SHA256"),

# image types
("imagefile.jpg", format"JPEG"),
("imagefile.jpeg", format"JPEG"),
("imagefile.png", format"PNG"),
("imagefile.tif", format"TIFF"),
("imagefile.tiff", format"TIFF"),

# dataframe types
("dataframe_file.csv", format"CSV")
]
for (file, fmt) in check_types
@test ReferenceTests.query_extended(file) == File{fmt}(file)
@test ReferenceTests.query_extended(abspath(file)) == File{fmt}(abspath(file))
end
end

@testset "maybe_encode" begin
@testset "string" begin
str1 = "Hello world"
str1_sha256 = "64ec88ca00b268e5ba1a35678a1b5316d212f4f366b2477232534a8aeca37f3c"
str2 = "Hello\n world"
str2_sha256 = "60b65ab310480818c4289227f2ec68f1714743db8571b4cb190e100c0085be3d" # bytes2hex(SHA.sha256(str2))
str2_crlf = "Hello\r\n world"
str3 = "Hello\nworld"
str3_sha256 = "46e0ea795802f17d0b340983ca7d7068c94d7d9172ee4daea37a1ab1168649ec" # bytes2hex(SHA.sha256(str3))
str3_arr1 = ["Hello", "world"]
str3_arr2 = ["Hello" "world"]
str4 = "Hello\n world1\nHello\n world2"
str4_sha256 = "c7dc8b82c3a6fed4afa0c8790a0586b73df0e4f35524efe6810e5d78b6b6a611" # bytes2hex(SHA.sha256(str4))
str4_arr = ["Hello\r\n world1", "Hello\n world2"]

# string as plain text
fmt = format"TXT"
# convert should respect whitespaces
@test str1 == ReferenceTests.maybe_encode(fmt, str1)
@test str2 == ReferenceTests.maybe_encode(fmt, str2)
# but ignore CRLF/LF differences
@test str2 == ReferenceTests.maybe_encode(fmt, str2_crlf)
# string arrays are treated as multi-line strings, even for UNKNOWN format
@test str3 == ReferenceTests.maybe_encode(fmt, str3)
@test str3 == ReferenceTests.maybe_encode(fmt, str3_arr1)
@test str3 == ReferenceTests.maybe_encode(fmt, str3_arr2)
# string arrays should ignore CRLF/LF differences, too
@test str4 == ReferenceTests.maybe_encode(fmt, str4_arr)

# string as SHA256 should also ignore CRLF/LF differences
fmt = format"SHA256"
@test str1_sha256 == ReferenceTests.maybe_encode(fmt, str1)
@test str2_sha256 == ReferenceTests.maybe_encode(fmt, str2)
# but ignore CRLF/LF differences
@test str2_sha256 == ReferenceTests.maybe_encode(fmt, str2_crlf)
# string arrays are treated as multi-line strings, even for UNKNOWN format
@test str3_sha256 == ReferenceTests.maybe_encode(fmt, str3)
@test str3_sha256 == ReferenceTests.maybe_encode(fmt, str3_arr1)
@test str3_sha256 == ReferenceTests.maybe_encode(fmt, str3_arr2)
# string arrays should ignore CRLF/LF differences, too
@test str4_sha256 == ReferenceTests.maybe_encode(fmt, str4_arr)

# unknown formats
fmt = format"PNG"
for str in (str1, str2, str2_crlf, str3, str3_arr1, str3_arr2)
@test str === ReferenceTests.maybe_encode(fmt, str)
end
end

@testset "numbers" begin
for num in (0x01, 1, 1.0f0, 1.0)
for fmt in (format"TXT", format"UNKNOWN")
@test num === ReferenceTests.maybe_encode(fmt, num)
end
fmt = format"SHA256"
@test ReferenceTests.maybe_encode(fmt, num) == ReferenceTests.maybe_encode(fmt, string(num))
end


for (fmt, a, ref) in [
# if target is TXT, convert it to string
(format"TXT", [1, 2], "[1, 2]"),
(format"TXT", [1,2], "[1, 2]"),
(format"TXT", [1;2], "[1, 2]"),
(format"TXT", [1 2], "[1 2]"),
(format"TXT", [1 2; 3 4], "[1 2; 3 4]"),
# if target is Unknown, make no change
(format"UNKNOWN", [1, 2], [1, 2]),
(format"UNKNOWN", [1,2], [1, 2]),
(format"UNKNOWN", [1;2], [1, 2]),
(format"UNKNOWN", [1 2], [1 2]),
(format"UNKNOWN", [1 2; 3 4], [1 2; 3 4]),
]
@test ref == ReferenceTests.maybe_encode(fmt, a)
end

for a in [[1, 2], [1 2], [1 2; 3 4]]
fmt = format"SHA256"
@test ReferenceTests.maybe_encode(fmt, a) == ReferenceTests.maybe_encode(fmt, string(a))
end

end

@testset "image" begin
gray_1d = Gray{N0f8}.(0.0:0.1:0.9)
rgb_1d = RGB.(gray_1d)
gray_2d = Gray{N0f8}.(reshape(0.0:0.1:0.9, 2, 5))
rgb_2d = RGB.(gray_2d)
gray_3d = Gray{N0f8}.(reshape(0.0:0.02:0.95, 2, 4, 6))
rgb_3d = RGB.(gray_3d)

# any common image types
for img in (gray_1d, gray_2d, gray_3d, rgb_1d, rgb_2d, rgb_3d)
for fmt in (format"JPEG", format"PNG", format"TIFF", format"UNKNOWN")
@test img === ReferenceTests.maybe_encode(fmt, img)
end
end

# image as text file
fmt = format"TXT"
# TODO: support n-D image encoding
# @test_reference joinpath(refdir, "gray_1d_as_txt.txt") ReferenceTests.maybe_encode(fmt, gray_1d)
# @test_reference joinpath(refdir, "rgb_1d_as_txt.txt") ReferenceTests.maybe_encode(fmt, rgb_1d)
@test_reference joinpath(refdir, "gray_2d_as_txt.txt") ReferenceTests.maybe_encode(fmt, gray_2d)
@test_reference joinpath(refdir, "rgb_2d_as_txt.txt") ReferenceTests.maybe_encode(fmt, rgb_2d)
# @test_reference joinpath(refdir, "gray_3d_as_txt.txt") ReferenceTests.maybe_encode(fmt, gray_3d)
# @test_reference joinpath(refdir, "rgb_3d_as_txt.txt") ReferenceTests.maybe_encode(fmt, rgb_3d)

# image as SHA256
fmt = format"SHA256"
for (file, img) in [
("gray_1d", gray_1d),
("gray_2d", gray_2d),
("gray_3d", gray_3d),
("rgb_1d", rgb_1d),
("rgb_2d", rgb_2d),
("rgb_3d", rgb_3d)
]
reffile = joinpath(refdir, "$(file)_as_sha256.txt")
@test_reference reffile ReferenceTests.maybe_encode(fmt, img)
end
end

# dataframe
@testset "dataframe" begin
df = DataFrame(v1=[1,2,3], v2=["a","b","c"])

@test string(df) == ReferenceTests.maybe_encode(format"TXT", df)
for fmt in (format"CSV", format"UNKNOWN")
@test df === ReferenceTests.maybe_encode(fmt, df)
end

fmt = format"SHA256"
@test_reference joinpath(refdir, "dataframe_as_sha256.txt") ReferenceTests.maybe_encode(fmt, df)

end
end

# TODO: savefile & loadfile
File renamed without changes.
1 change: 1 addition & 0 deletions test/references/fileio/dataframe_as_sha256.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2cf7c4edcafc27a5eb1b74fb0af704edc0d9bbef91a1b55d3b7350fa4b54cd18
1 change: 1 addition & 0 deletions test/references/fileio/gray_1d_as_sha256.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a111f275cc2e7588000001d300a31e76336d15b9d314cd1a1d8f3d3556975eed10ef43c7fcace84c4d0d54b8e92c0c9be2d14a6bf3dd7647254a3cc0c4a04297
1 change: 1 addition & 0 deletions test/references/fileio/gray_2d_as_sha256.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
26cfbb315c316a0b15516434f90284e5011dcb58503fe39eb036bf669bd8233d10ef43c7fcace84c4d0d54b8e92c0c9be2d14a6bf3dd7647254a3cc0c4a04297
1 change: 1 addition & 0 deletions test/references/fileio/gray_2d_as_txt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
▀▀▀▀▀
1 change: 1 addition & 0 deletions test/references/fileio/gray_3d_as_sha256.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
72307e420b5460c03a1c167060ed336407c26ea74aabf8fab76dd8e9dbe8cbe4baf0f53196e8d5270c0b0b2da82bbbb4676edbb0ebf84ec0dcbd8c0bf4d9af68
1 change: 1 addition & 0 deletions test/references/fileio/rgb_1d_as_sha256.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a111f275cc2e7588000001d300a31e76336d15b9d314cd1a1d8f3d3556975eedebd6b0ad29dd5402ce5745bb5b48d4c59b7f8da0cdf8d2f287befd9094f6ac89
1 change: 1 addition & 0 deletions test/references/fileio/rgb_2d_as_sha256.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
26cfbb315c316a0b15516434f90284e5011dcb58503fe39eb036bf669bd8233debd6b0ad29dd5402ce5745bb5b48d4c59b7f8da0cdf8d2f287befd9094f6ac89
1 change: 1 addition & 0 deletions test/references/fileio/rgb_2d_as_txt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
▀▀▀▀▀
1 change: 1 addition & 0 deletions test/references/fileio/rgb_3d_as_sha256.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
72307e420b5460c03a1c167060ed336407c26ea74aabf8fab76dd8e9dbe8cbe45465bcbf50acdbe5600207e3266eedef6548bc4d244e55d7a1af0f1af09e019f
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file added test/render.jl
Empty file.
Loading