Skip to content

Commit

Permalink
feat: form from type functions (#106)
Browse files Browse the repository at this point in the history
* form from type functions

* add check on iterable type

* add test

* correct numpy type

* fix offsets type

* add test

* add module

* move function out

* add more functions

* try to fix Symbol issue

* add more tests

* cleanup and add more tests

* add tree form test function

* add tests

* fix FieldError

* use FieldError for nightly only

* fix typo

* one more

* last one
  • Loading branch information
ianna authored Jun 14, 2024
1 parent 3993678 commit 96c45c4
Show file tree
Hide file tree
Showing 4 changed files with 346 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/AwkwardArray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import Tables

include("./all_implementations.jl")
include("./tables.jl")
include("./form_utils.jl")

include("./AwkwardPythonCallExt.jl")
using .AwkwardPythonCallExt: convert
Expand Down
232 changes: 232 additions & 0 deletions src/form_utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@

# Define a dictionary mapping Julia types to NumPy types
const julia_to_numpy = Dict(
Int8 => "int8",
UInt8 => "uint8",
Int16 => "int16",
UInt16 => "uint16",
Int32 => "int32",
UInt32 => "uint32",
Int64 => "int64",
UInt64 => "uint64",
Float16 => "float16",
Float32 => "float32",
Float64 => "float64",
Bool => "bool",
Complex{Float32} => "complex64",
Complex{Float64} => "complex128",
String => "str"
)

# Function to get the corresponding NumPy type
function julia_to_numpy_type(julia_type::Type)
result = get(julia_to_numpy, julia_type, "unknown")
return String(result)
end

# Function to generate form key
function _generate_form_key!(form_key_id_ref::Base.RefValue{Int64})
form_key_id = form_key_id_ref[]
form_key_id_ref[] += 1
return "node$form_key_id"
end

function json_numpy_form(parameters::String, form_key::String)
return "{\"class\": \"NumpyArray\", \"primitive\": \"" * parameters *
"\"form_key\": \"" * form_key * "\"}"
end

# Function for handling primitive types
function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Integer}
form_key = "node$(form_key_id)"
form_key_id += 1

parameters = julia_to_numpy_type(T) * "\", "

return json_numpy_form(parameters, form_key)
end

function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Integer}
form_key = _generate_form_key!(form_key_id_ref)

parameters = julia_to_numpy_type(T) * "\", "

return json_numpy_form(parameters, form_key)
end

function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: AbstractFloat}
form_key = "node$(form_key_id)"
form_key_id += 1

parameters = julia_to_numpy_type(T) * "\", "

return json_numpy_form(parameters, form_key)
end

function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: AbstractFloat}
form_key = _generate_form_key!(form_key_id_ref)

parameters = julia_to_numpy_type(T) * "\", "

return json_numpy_form(parameters, form_key)
end

function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Bool}
form_key = "node$(form_key_id)"
form_key_id += 1

parameters = julia_to_numpy_type(T) * "\", "

return json_numpy_form(parameters, form_key)
end

function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Bool}
form_key = _generate_form_key!(form_key_id_ref)

parameters = julia_to_numpy_type(T) * "\", "

return json_numpy_form(parameters, form_key)
end

function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Char}
form_key = "node$(form_key_id)"
form_key_id += 1

parameters = "uint8\", \"parameters\": { \"__array__\": \"char\" }, "

return json_numpy_form(parameters, form_key)
end

function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Char}
form_key = _generate_form_key!(form_key_id_ref)

parameters = "uint8\", \"parameters\": { \"__array__\": \"char\" }, "

return json_numpy_form(parameters, form_key)
end

function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: String}
value_type = eltype(T)
form_key = "node$(form_key_id)"
form_key_id += 1

parameters = " \"parameters\": { \"__array__\": \"string\" }, "

content = type_to_form(value_type, form_key_id)

return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" *
type_to_numpy_like(T) * "\", " *
"\"content\": " * content * ", " * parameters *
"\"form_key\": \"" * form_key * "\"}"
end

function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: String}
value_type = eltype(T)
form_key = _generate_form_key!(form_key_id_ref)

parameters = " \"parameters\": { \"__array__\": \"string\" }, "

content = type_to_form(value_type, form_key_id_ref)

return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" *
type_to_numpy_like(T) * "\", " *
"\"content\": " * content * ", " * parameters *
"\"form_key\": \"" * form_key * "\"}"
end

# Function to handle specific Vector types
function type_to_form(::Type{Vector{T}}, form_key_id::Int64=0) where {T}
element_type = T
content_form = type_to_form(element_type, form_key_id + 1)
return "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", " *
"\"content\": " * content_form * ", " *
"\"form_key\": \"node$(form_key_id)\"}"
end

function type_to_form(::Type{Vector{T}}, form_key_id_ref::Base.RefValue{Int64}) where {T}
element_type = T
form_key = _generate_form_key!(form_key_id_ref)

content_form = type_to_form(element_type, form_key_id_ref)
return "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", " *
"\"content\": " * content_form * ", " *
"\"form_key\": \"" * form_key * "\"}"
end

# Function for handling iterable types
function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: AbstractVector}
value_type = eltype(T)
form_key = "node$(form_key_id)"
form_key_id += 1

parameters = ""
if value_type == Char
parameters = " \"parameters\": { \"__array__\": \"string\" }, "
end

content = type_to_form(value_type, form_key_id)

return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" *
type_to_numpy_like(T) * "\", " *
"\"content\": " * content * ", " * parameters *
"\"form_key\": \"" * form_key * "\"}"
end

function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: AbstractVector}
value_type = eltype(T)
form_key = _generate_form_key!(form_key_id_ref)

parameters = ""
if value_type == Char
parameters = " \"parameters\": { \"__array__\": \"string\" }, "
end

content = type_to_form(value_type, form_key_id_ref)

return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" *
type_to_numpy_like(T) * "\", " *
"\"content\": " * content * ", " * parameters *
"\"form_key\": \"" * form_key * "\"}"
end

# Fallback function for unsupported types
function type_to_form(::Type{T}, ::Int64) where {T}
error("Type '$T' is not supported yet.")
end

function type_to_form(::Type{T}, ::Base.RefValue{Int64}) where {T}
error("Type '$T' is not supported yet.")
end

# Helper function for type_to_numpy_like (placeholder implementation)
function type_to_numpy_like(::Type{T}) where {T}
return "int64" # Placeholder implementation
end

# A RecordArray form of all tree brunches
function tree_branches_type(tree, form_key_id::Int64=0)
form = """{"class": "RecordArray", "fields": ["""
form_fields = ""
form_contents = ""

id = form_key_id
id_ref = Ref(id)

for name in propertynames(tree)
form_fields *= """$name, """
branch = getproperty(tree, name)
branch_type = eltype(branch)
form_contents *= type_to_form(branch_type, id_ref) * ", "
end

# Removing the trailing comma and space
form_fields = replace(rstrip(form_fields), r",\s*$" => "")
form_contents = replace(rstrip(form_contents), r",\s*$" => "")

form *= form_fields * """], "contents": [""" * form_contents
form *= """], "parameters": {}, "form_key": \"""" *
_generate_form_key!(id_ref) * "\"}"

return form
end

23 changes: 20 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,13 @@ end
layout = AwkwardArray.ListOffsetArray([1, 2, 5], content_layout)

@test layout[:a] == [[2], [3, 4, 5]]
@test_throws ErrorException getindex(layout, :invalid)

if VERSION >= v"1.12.0-DEV"
@test_throws FieldError getindex(layout, :invalid)
else
@test_throws ErrorException getindex(layout, :invalid)
end

@test_throws AssertionError getindex(layout[:a], :invalid)
end

Expand Down Expand Up @@ -490,7 +496,12 @@ end
layout = AwkwardArray.ListArray([1, 2, 5], [2, 5, 5], content_layout)

@test layout[:a] == [[2], [3, 4, 5], []]
@test_throws ErrorException getindex(layout, :invalid)
if VERSION >= v"1.12.0-DEV"
@test_throws FieldError getindex(layout, :invalid)
else
@test_throws ErrorException getindex(layout, :invalid)
end

@test_throws AssertionError getindex(layout[:a], :invalid)
end
end
Expand Down Expand Up @@ -709,7 +720,12 @@ end
layout = AwkwardArray.RegularArray(content_layout, 2)

@test layout[:a] == [[1, 2], [3, 4]]
@test_throws ErrorException getindex(layout, :invalid)
if VERSION >= v"1.12.0-DEV"
@test_throws FieldError getindex(layout, :invalid)
else
@test_throws ErrorException getindex(layout, :invalid)
end

@test_throws AssertionError getindex(layout[:a], :invalid)
end
end
Expand Down Expand Up @@ -3721,3 +3737,4 @@ end # @testset "AwkwardArray.jl"
end # @testset "Tables.jl"

include("./runpytests.jl")
include("./test_106_form_from_type.jl")
93 changes: 93 additions & 0 deletions test/test_106_form_from_type.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@

@testset "Form from type" begin

@test AwkwardArray.type_to_form(Bool, 1) == """{"class": "NumpyArray", "primitive": "bool", "form_key": "node1"}"""
@test AwkwardArray.type_to_form(Int, 1) == """{"class": "NumpyArray", "primitive": "int64", "form_key": "node1"}"""
@test AwkwardArray.type_to_form(Int32, 1) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node1"}"""
@test AwkwardArray.type_to_form(Int32, 0) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node0"}"""
@test AwkwardArray.type_to_form(Int64, 1) == "{\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node1\"}"
@test AwkwardArray.type_to_form(Char, 1) == "{\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node1\"}"
@test AwkwardArray.type_to_form(String, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node2\"}, \"parameters\": { \"__array__\": \"string\" }, \"form_key\": \"node1\"}"

@test AwkwardArray.type_to_form(Vector{Int}, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node2\"}, \"form_key\": \"node1\"}"
@test AwkwardArray.type_to_form(Vector{Int32}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node2"}, "form_key": "node1"}"""
@test AwkwardArray.type_to_form(Vector{Int64}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}"""
@test AwkwardArray.type_to_form(Vector{Float32}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node2"}, "form_key": "node1"}"""
@test AwkwardArray.type_to_form(Vector{Float64}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node2"}, "form_key": "node1"}"""

@test AwkwardArray.type_to_form(Vector{Vector{Int}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
@test AwkwardArray.type_to_form(Vector{Vector{Int32}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
@test AwkwardArray.type_to_form(Vector{Vector{Int64}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
@test AwkwardArray.type_to_form(Vector{Vector{Float32}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
@test AwkwardArray.type_to_form(Vector{Vector{Float64}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""

@test AwkwardArray.type_to_form(SubArray{Int32, 1, nothing}, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int32\", \"form_key\": \"node2\"}, \"form_key\": \"node1\"}"

end

@testset "Form from type with an id reference" begin
id = 1
id_ref = Ref(id)

@test AwkwardArray.type_to_form(Bool, id_ref) == """{"class": "NumpyArray", "primitive": "bool", "form_key": "node1"}"""
@test AwkwardArray.type_to_form(Int, id_ref) == """{"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}"""
@test AwkwardArray.type_to_form(Int32, id_ref) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node3"}"""
@test AwkwardArray.type_to_form(Int32, id_ref) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node4"}"""
@test AwkwardArray.type_to_form(Int64, id_ref) == "{\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node5\"}"
@test AwkwardArray.type_to_form(Char, id_ref) == "{\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node6\"}"
@test AwkwardArray.type_to_form(String, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node8\"}, \"parameters\": { \"__array__\": \"string\" }, \"form_key\": \"node7\"}"

@test AwkwardArray.type_to_form(Vector{Int}, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node10\"}, \"form_key\": \"node9\"}"
@test AwkwardArray.type_to_form(Vector{Int32}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node12"}, "form_key": "node11"}"""
@test AwkwardArray.type_to_form(Vector{Int64}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node14"}, "form_key": "node13"}"""
@test AwkwardArray.type_to_form(Vector{Float32}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node16"}, "form_key": "node15"}"""
@test AwkwardArray.type_to_form(Vector{Float64}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node18"}, "form_key": "node17"}"""

@test AwkwardArray.type_to_form(Vector{Vector{Int}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node21"}, "form_key": "node20"}, "form_key": "node19"}"""
@test AwkwardArray.type_to_form(Vector{Vector{Int32}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node24"}, "form_key": "node23"}, "form_key": "node22"}"""
@test AwkwardArray.type_to_form(Vector{Vector{Int64}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node27"}, "form_key": "node26"}, "form_key": "node25"}"""
@test AwkwardArray.type_to_form(Vector{Vector{Float32}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node30"}, "form_key": "node29"}, "form_key": "node28"}"""
@test AwkwardArray.type_to_form(Vector{Vector{Float64}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node33"}, "form_key": "node32"}, "form_key": "node31"}"""

@test AwkwardArray.type_to_form(SubArray{Int32, 1, nothing}, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int32\", \"form_key\": \"node35\"}, \"form_key\": \"node34\"}"

end

@testset "type_to_form error tests" begin
try
AwkwardArray.type_to_form(Any, 1)
@test false # This line should never be reached
catch e
@test isa(e, ErrorException)
@test occursin("Type 'Any' is not supported yet.", e.msg)
end

try
AwkwardArray.type_to_form(Nothing, 1)
@test false # This line should never be reached
catch e
@test isa(e, ErrorException)
@test occursin("Type 'Nothing' is not supported yet.", e.msg)
end
end

@testset "Key generate" begin
begin
id = 1
id_ref = Ref(id)
node_key = AwkwardArray._generate_form_key!(id_ref)
@test node_key == "node1"
@test id_ref[] == 2
end
end

@testset "tree_branches_type tests" begin
mutable struct TestTree
field1::Vector{Int}
field2::Vector{Int}
end

tree = TestTree([1, 2, 3], [4, 5, 6])
expected_form = """{"class": "RecordArray", "fields": [field1, field2], "contents": [{"class": "NumpyArray", "primitive": "int64", "form_key": "node0"}, {"class": "NumpyArray", "primitive": "int64", "form_key": "node1"}], "parameters": {}, "form_key": "node2"}"""
@test AwkwardArray.tree_branches_type(tree) == expected_form
end

0 comments on commit 96c45c4

Please sign in to comment.