From 96c45c458342cec42cc28a5f1a50381c70969427 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Fri, 14 Jun 2024 19:23:55 +0200 Subject: [PATCH] feat: form from type functions (#106) * form from type functions * add check on iterable type * add test * correct numpy type * fix offsets type * add test * add module * move function out * add more functions * try to fix Symbol issue * add more tests * cleanup and add more tests * add tree form test function * add tests * fix FieldError * use FieldError for nightly only * fix typo * one more * last one --- src/AwkwardArray.jl | 1 + src/form_utils.jl | 232 ++++++++++++++++++++++++++++++++ test/runtests.jl | 23 +++- test/test_106_form_from_type.jl | 93 +++++++++++++ 4 files changed, 346 insertions(+), 3 deletions(-) create mode 100644 src/form_utils.jl create mode 100644 test/test_106_form_from_type.jl diff --git a/src/AwkwardArray.jl b/src/AwkwardArray.jl index 3d3cbd5..d08dfbf 100644 --- a/src/AwkwardArray.jl +++ b/src/AwkwardArray.jl @@ -9,6 +9,7 @@ import Tables include("./all_implementations.jl") include("./tables.jl") +include("./form_utils.jl") include("./AwkwardPythonCallExt.jl") using .AwkwardPythonCallExt: convert diff --git a/src/form_utils.jl b/src/form_utils.jl new file mode 100644 index 0000000..abb2fe0 --- /dev/null +++ b/src/form_utils.jl @@ -0,0 +1,232 @@ + +# Define a dictionary mapping Julia types to NumPy types +const julia_to_numpy = Dict( + Int8 => "int8", + UInt8 => "uint8", + Int16 => "int16", + UInt16 => "uint16", + Int32 => "int32", + UInt32 => "uint32", + Int64 => "int64", + UInt64 => "uint64", + Float16 => "float16", + Float32 => "float32", + Float64 => "float64", + Bool => "bool", + Complex{Float32} => "complex64", + Complex{Float64} => "complex128", + String => "str" +) + +# Function to get the corresponding NumPy type +function julia_to_numpy_type(julia_type::Type) + result = get(julia_to_numpy, julia_type, "unknown") + return String(result) +end + +# Function to generate form key +function _generate_form_key!(form_key_id_ref::Base.RefValue{Int64}) + form_key_id = form_key_id_ref[] + form_key_id_ref[] += 1 + return "node$form_key_id" +end + +function json_numpy_form(parameters::String, form_key::String) + return "{\"class\": \"NumpyArray\", \"primitive\": \"" * parameters * + "\"form_key\": \"" * form_key * "\"}" +end + +# Function for handling primitive types +function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Integer} + form_key = "node$(form_key_id)" + form_key_id += 1 + + parameters = julia_to_numpy_type(T) * "\", " + + return json_numpy_form(parameters, form_key) +end + +function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Integer} + form_key = _generate_form_key!(form_key_id_ref) + + parameters = julia_to_numpy_type(T) * "\", " + + return json_numpy_form(parameters, form_key) +end + +function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: AbstractFloat} + form_key = "node$(form_key_id)" + form_key_id += 1 + + parameters = julia_to_numpy_type(T) * "\", " + + return json_numpy_form(parameters, form_key) +end + +function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: AbstractFloat} + form_key = _generate_form_key!(form_key_id_ref) + + parameters = julia_to_numpy_type(T) * "\", " + + return json_numpy_form(parameters, form_key) +end + +function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Bool} + form_key = "node$(form_key_id)" + form_key_id += 1 + + parameters = julia_to_numpy_type(T) * "\", " + + return json_numpy_form(parameters, form_key) +end + +function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Bool} + form_key = _generate_form_key!(form_key_id_ref) + + parameters = julia_to_numpy_type(T) * "\", " + + return json_numpy_form(parameters, form_key) +end + +function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Char} + form_key = "node$(form_key_id)" + form_key_id += 1 + + parameters = "uint8\", \"parameters\": { \"__array__\": \"char\" }, " + + return json_numpy_form(parameters, form_key) +end + +function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Char} + form_key = _generate_form_key!(form_key_id_ref) + + parameters = "uint8\", \"parameters\": { \"__array__\": \"char\" }, " + + return json_numpy_form(parameters, form_key) +end + +function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: String} + value_type = eltype(T) + form_key = "node$(form_key_id)" + form_key_id += 1 + + parameters = " \"parameters\": { \"__array__\": \"string\" }, " + + content = type_to_form(value_type, form_key_id) + + return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" * + type_to_numpy_like(T) * "\", " * + "\"content\": " * content * ", " * parameters * + "\"form_key\": \"" * form_key * "\"}" +end + +function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: String} + value_type = eltype(T) + form_key = _generate_form_key!(form_key_id_ref) + + parameters = " \"parameters\": { \"__array__\": \"string\" }, " + + content = type_to_form(value_type, form_key_id_ref) + + return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" * + type_to_numpy_like(T) * "\", " * + "\"content\": " * content * ", " * parameters * + "\"form_key\": \"" * form_key * "\"}" +end + +# Function to handle specific Vector types +function type_to_form(::Type{Vector{T}}, form_key_id::Int64=0) where {T} + element_type = T + content_form = type_to_form(element_type, form_key_id + 1) + return "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", " * + "\"content\": " * content_form * ", " * + "\"form_key\": \"node$(form_key_id)\"}" +end + +function type_to_form(::Type{Vector{T}}, form_key_id_ref::Base.RefValue{Int64}) where {T} + element_type = T + form_key = _generate_form_key!(form_key_id_ref) + + content_form = type_to_form(element_type, form_key_id_ref) + return "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", " * + "\"content\": " * content_form * ", " * + "\"form_key\": \"" * form_key * "\"}" +end + +# Function for handling iterable types +function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: AbstractVector} + value_type = eltype(T) + form_key = "node$(form_key_id)" + form_key_id += 1 + + parameters = "" + if value_type == Char + parameters = " \"parameters\": { \"__array__\": \"string\" }, " + end + + content = type_to_form(value_type, form_key_id) + + return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" * + type_to_numpy_like(T) * "\", " * + "\"content\": " * content * ", " * parameters * + "\"form_key\": \"" * form_key * "\"}" +end + +function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: AbstractVector} + value_type = eltype(T) + form_key = _generate_form_key!(form_key_id_ref) + + parameters = "" + if value_type == Char + parameters = " \"parameters\": { \"__array__\": \"string\" }, " + end + + content = type_to_form(value_type, form_key_id_ref) + + return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" * + type_to_numpy_like(T) * "\", " * + "\"content\": " * content * ", " * parameters * + "\"form_key\": \"" * form_key * "\"}" +end + +# Fallback function for unsupported types +function type_to_form(::Type{T}, ::Int64) where {T} + error("Type '$T' is not supported yet.") +end + +function type_to_form(::Type{T}, ::Base.RefValue{Int64}) where {T} + error("Type '$T' is not supported yet.") +end + +# Helper function for type_to_numpy_like (placeholder implementation) +function type_to_numpy_like(::Type{T}) where {T} + return "int64" # Placeholder implementation +end + +# A RecordArray form of all tree brunches +function tree_branches_type(tree, form_key_id::Int64=0) + form = """{"class": "RecordArray", "fields": [""" + form_fields = "" + form_contents = "" + + id = form_key_id + id_ref = Ref(id) + + for name in propertynames(tree) + form_fields *= """$name, """ + branch = getproperty(tree, name) + branch_type = eltype(branch) + form_contents *= type_to_form(branch_type, id_ref) * ", " + end + + # Removing the trailing comma and space + form_fields = replace(rstrip(form_fields), r",\s*$" => "") + form_contents = replace(rstrip(form_contents), r",\s*$" => "") + + form *= form_fields * """], "contents": [""" * form_contents + form *= """], "parameters": {}, "form_key": \"""" * + _generate_form_key!(id_ref) * "\"}" + + return form +end + diff --git a/test/runtests.jl b/test/runtests.jl index d6d2b25..2f3a7f3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -313,7 +313,13 @@ end layout = AwkwardArray.ListOffsetArray([1, 2, 5], content_layout) @test layout[:a] == [[2], [3, 4, 5]] - @test_throws ErrorException getindex(layout, :invalid) + + if VERSION >= v"1.12.0-DEV" + @test_throws FieldError getindex(layout, :invalid) + else + @test_throws ErrorException getindex(layout, :invalid) + end + @test_throws AssertionError getindex(layout[:a], :invalid) end @@ -490,7 +496,12 @@ end layout = AwkwardArray.ListArray([1, 2, 5], [2, 5, 5], content_layout) @test layout[:a] == [[2], [3, 4, 5], []] - @test_throws ErrorException getindex(layout, :invalid) + if VERSION >= v"1.12.0-DEV" + @test_throws FieldError getindex(layout, :invalid) + else + @test_throws ErrorException getindex(layout, :invalid) + end + @test_throws AssertionError getindex(layout[:a], :invalid) end end @@ -709,7 +720,12 @@ end layout = AwkwardArray.RegularArray(content_layout, 2) @test layout[:a] == [[1, 2], [3, 4]] - @test_throws ErrorException getindex(layout, :invalid) + if VERSION >= v"1.12.0-DEV" + @test_throws FieldError getindex(layout, :invalid) + else + @test_throws ErrorException getindex(layout, :invalid) + end + @test_throws AssertionError getindex(layout[:a], :invalid) end end @@ -3721,3 +3737,4 @@ end # @testset "AwkwardArray.jl" end # @testset "Tables.jl" include("./runpytests.jl") +include("./test_106_form_from_type.jl") diff --git a/test/test_106_form_from_type.jl b/test/test_106_form_from_type.jl new file mode 100644 index 0000000..a552d80 --- /dev/null +++ b/test/test_106_form_from_type.jl @@ -0,0 +1,93 @@ + +@testset "Form from type" begin + + @test AwkwardArray.type_to_form(Bool, 1) == """{"class": "NumpyArray", "primitive": "bool", "form_key": "node1"}""" + @test AwkwardArray.type_to_form(Int, 1) == """{"class": "NumpyArray", "primitive": "int64", "form_key": "node1"}""" + @test AwkwardArray.type_to_form(Int32, 1) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node1"}""" + @test AwkwardArray.type_to_form(Int32, 0) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node0"}""" + @test AwkwardArray.type_to_form(Int64, 1) == "{\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node1\"}" + @test AwkwardArray.type_to_form(Char, 1) == "{\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node1\"}" + @test AwkwardArray.type_to_form(String, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node2\"}, \"parameters\": { \"__array__\": \"string\" }, \"form_key\": \"node1\"}" + + @test AwkwardArray.type_to_form(Vector{Int}, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node2\"}, \"form_key\": \"node1\"}" + @test AwkwardArray.type_to_form(Vector{Int32}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node2"}, "form_key": "node1"}""" + @test AwkwardArray.type_to_form(Vector{Int64}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}""" + @test AwkwardArray.type_to_form(Vector{Float32}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node2"}, "form_key": "node1"}""" + @test AwkwardArray.type_to_form(Vector{Float64}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node2"}, "form_key": "node1"}""" + + @test AwkwardArray.type_to_form(Vector{Vector{Int}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}""" + @test AwkwardArray.type_to_form(Vector{Vector{Int32}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}""" + @test AwkwardArray.type_to_form(Vector{Vector{Int64}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}""" + @test AwkwardArray.type_to_form(Vector{Vector{Float32}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}""" + @test AwkwardArray.type_to_form(Vector{Vector{Float64}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}""" + + @test AwkwardArray.type_to_form(SubArray{Int32, 1, nothing}, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int32\", \"form_key\": \"node2\"}, \"form_key\": \"node1\"}" + +end + +@testset "Form from type with an id reference" begin + id = 1 + id_ref = Ref(id) + + @test AwkwardArray.type_to_form(Bool, id_ref) == """{"class": "NumpyArray", "primitive": "bool", "form_key": "node1"}""" + @test AwkwardArray.type_to_form(Int, id_ref) == """{"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}""" + @test AwkwardArray.type_to_form(Int32, id_ref) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node3"}""" + @test AwkwardArray.type_to_form(Int32, id_ref) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node4"}""" + @test AwkwardArray.type_to_form(Int64, id_ref) == "{\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node5\"}" + @test AwkwardArray.type_to_form(Char, id_ref) == "{\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node6\"}" + @test AwkwardArray.type_to_form(String, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node8\"}, \"parameters\": { \"__array__\": \"string\" }, \"form_key\": \"node7\"}" + + @test AwkwardArray.type_to_form(Vector{Int}, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node10\"}, \"form_key\": \"node9\"}" + @test AwkwardArray.type_to_form(Vector{Int32}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node12"}, "form_key": "node11"}""" + @test AwkwardArray.type_to_form(Vector{Int64}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node14"}, "form_key": "node13"}""" + @test AwkwardArray.type_to_form(Vector{Float32}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node16"}, "form_key": "node15"}""" + @test AwkwardArray.type_to_form(Vector{Float64}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node18"}, "form_key": "node17"}""" + + @test AwkwardArray.type_to_form(Vector{Vector{Int}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node21"}, "form_key": "node20"}, "form_key": "node19"}""" + @test AwkwardArray.type_to_form(Vector{Vector{Int32}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node24"}, "form_key": "node23"}, "form_key": "node22"}""" + @test AwkwardArray.type_to_form(Vector{Vector{Int64}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node27"}, "form_key": "node26"}, "form_key": "node25"}""" + @test AwkwardArray.type_to_form(Vector{Vector{Float32}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node30"}, "form_key": "node29"}, "form_key": "node28"}""" + @test AwkwardArray.type_to_form(Vector{Vector{Float64}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node33"}, "form_key": "node32"}, "form_key": "node31"}""" + + @test AwkwardArray.type_to_form(SubArray{Int32, 1, nothing}, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int32\", \"form_key\": \"node35\"}, \"form_key\": \"node34\"}" + +end + +@testset "type_to_form error tests" begin + try + AwkwardArray.type_to_form(Any, 1) + @test false # This line should never be reached + catch e + @test isa(e, ErrorException) + @test occursin("Type 'Any' is not supported yet.", e.msg) + end + + try + AwkwardArray.type_to_form(Nothing, 1) + @test false # This line should never be reached + catch e + @test isa(e, ErrorException) + @test occursin("Type 'Nothing' is not supported yet.", e.msg) + end +end + +@testset "Key generate" begin + begin + id = 1 + id_ref = Ref(id) + node_key = AwkwardArray._generate_form_key!(id_ref) + @test node_key == "node1" + @test id_ref[] == 2 + end +end + +@testset "tree_branches_type tests" begin + mutable struct TestTree + field1::Vector{Int} + field2::Vector{Int} + end + + tree = TestTree([1, 2, 3], [4, 5, 6]) + expected_form = """{"class": "RecordArray", "fields": [field1, field2], "contents": [{"class": "NumpyArray", "primitive": "int64", "form_key": "node0"}, {"class": "NumpyArray", "primitive": "int64", "form_key": "node1"}], "parameters": {}, "form_key": "node2"}""" + @test AwkwardArray.tree_branches_type(tree) == expected_form +end