Skip to content

Commit

Permalink
read function, delim for each datatable, methodtable, and calibration…
Browse files Browse the repository at this point in the history
…, use index for getanalyte when possible
  • Loading branch information
yufongpeng committed Jan 8, 2024
1 parent 8797489 commit 247166b
Show file tree
Hide file tree
Showing 46 changed files with 238 additions and 51 deletions.
14 changes: 10 additions & 4 deletions src/ChemistryQuantitativeAnalysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,16 @@ function Batch{T}(method::MethodTable{A, <: T}, data = nothing;
) where {A, T}
Batch{T}(
method,
length(method.conctable.sample) > 1 ? map(method.conctable.analyte) do analyte
calibration(method, analyte; type, zero, weight)
end : map(method.conctable.analyte) do analyte
SingleCalibration((analyte, ), first(getanalyte(method.conctable, analyte)))
if length(method.conctable.sample) > 1
method.conctable.analyte == method.signaltable.analyte ? map(eachindex(method.conctable.analyte)) do i
calibration(method, i; type, zero, weight)
end : map(method.conctable.analyte) do analyte
calibration(method, analyte; type, zero, weight)
end
else
map(method.conctable.analyte) do analyte
SingleCalibration((analyte, ), first(getanalyte(method.conctable, analyte)))
end
end
,
data
Expand Down
44 changes: 38 additions & 6 deletions src/cal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ function relative_signal(method::MethodTable, dt::AbstractDataTable)
aid = [findfirst(==(analyte), analytetable.analyte) for analyte in dt.analyte]
cs = ThreadsX.map(eachindex(aid)) do i
analytetable.isd[aid[i]] < 0 ? repeat([NaN], length(dt.sample)) :
analytetable.isd[aid[i]] == 0 ? getanalyte(dt, dt.analyte[i]) :
getanalyte(dt, dt.analyte[i]) ./ getanalyte(dt, analytetable.analyte[analytetable.isd[aid[i]]])
analytetable.isd[aid[i]] == 0 ? getanalyte(dt, i) :
getanalyte(dt, i) ./ getanalyte(dt, analytetable.analyte[analytetable.isd[aid[i]]])
end
fill_result!(deepcopy(dt), cs)
end
Expand Down Expand Up @@ -70,7 +70,7 @@ function inv_predict(batch::Batch, dt::AbstractDataTable)
cal_id = [id > 0 ? findfirst(cal -> first(cal.analyte) == analytetable.analyte[id], batch.calibration) : nothing for id in cid]
cs = ThreadsX.map(eachindex(cal_id)) do i
isnothing(cal_id[i]) ? repeat([NaN], length(dt.sample)) :
inv_predict(batch.calibration[cal_id[i]], getanalyte(dt, dt.analyte[i]))
inv_predict(batch.calibration[cal_id[i]], getanalyte(dt, i))
end
fill_result!(deepcopy(dt), cs)
end
Expand Down Expand Up @@ -171,7 +171,7 @@ function quantification(batch::Batch, dt::AbstractDataTable)
cs = ThreadsX.map(eachindex(cal_id)) do i
isnothing(cal_id[i]) && return repeat([NaN], length(dt.sample))
cal = batch.calibration[cal_id[i]]
quantification(cal, dt; analyte = (dt.analyte[i], last(cal.analyte)))
quantification(cal, dt, i, last(cal.analyte))
end
fill_result!(deepcopy(dt), cs)
end
Expand All @@ -188,6 +188,11 @@ function quantification(cal::AbstractCalibration, dt::AbstractDataTable; analyte
inv_predict(cal, getanalyte(dt, first(analyte)) ./ getanalyte(dt, last(analyte)))
end

function quantification(cal::AbstractCalibration, dt::AbstractDataTable, analyte, isd)
isnothing(isd) && return inv_predict(cal, getanalyte(dt, analyte))
inv_predict(cal, getanalyte(dt, analyte) ./ getanalyte(dt, isd))
end

"""
set_quantification(at::AnalysisTable, batch::Batch; signal = batch.method.signal, relsig = :relative_signal, estimated_concentration = :estimated_concentration)
set_quantification!(at::AnalysisTable, batch::Batch; signal = batch.method.signal, relsig = :relative_signal, estimated_concentration = :estimated_concentration)
Expand Down Expand Up @@ -230,7 +235,9 @@ accuracy(at::AnalysisTable; true_concentration = :true_concentration, estimated_
accuracy(getproperty(at, estimated_concentration), getproperty(at, true_concentration))

function accuracy(dtp::AbstractDataTable, dtt::AbstractDataTable)
cs = ThreadsX.map(dtp.analyte) do analyte
cs = dtp.analyte == dtt.analyte ? ThreadsX.map(eachindex(dtp.analyte)) do i
accuracy(getanalyte(dtp, i), getanalyte(dtt, i))
end : ThreadsX.map(dtp.analyte) do analyte
accuracy(getanalyte(dtp, analyte), getanalyte(dtt, analyte))
end
fill_result!(deepcopy(dtp), cs)
Expand Down Expand Up @@ -331,7 +338,7 @@ calibration(batch::Batch{A}, analyte::B;
type = true,
zero = false,
weight = 0
) where {A, B <: A} = calibration(batch.method, analyte; id, isd, type, zero, weight)
) where {A, B} = calibration(batch.method, analyte; id, isd, type, zero, weight)
function calibration(method::MethodTable{A}, analyte::B;
id = method.signaltable.sample,
isd = isd_of(method, analyte),
Expand All @@ -357,6 +364,31 @@ function calibration(method::MethodTable{A}, analyte::B;
model = calfit(table, f, type, zero, weight)
inv_predict_accuracy!(MultipleCalibration((analyte, isd), type, zero, Float64(weight), f, table, model))
end
function calibration(method::MethodTable{A}, i::Int;
id = method.signaltable.sample,
isd = isd_of(method, method.conctable.analyte[i]),
type = true,
zero = false,
weight = 0
) where A

ord = sortperm(method.pointlevel)
level = method.pointlevel[ord]
conc = getanalyte(method.conctable, i)
table = Table(;
id = id[ord],
level = level,
x = map(level) do l
conc[findsample(method.conctable, Symbol(l))]
end,
y = isnothing(isd) ? getanalyte(method.signaltable, i)[ord] : (getanalyte(method.signaltable, i) ./ getanalyte(method.signaltable, isd))[ord],
= zeros(Float64, length(id)),
accuracy = zeros(Float64, length(id)),
include = trues(length(id)))
f = getformula(type, zero)
model = calfit(table, f, type, zero, weight)
inv_predict_accuracy!(MultipleCalibration((method.conctable.analyte[i], isd), type, zero, Float64(weight), f, table, model))
end

"""
calfit(tbl, formula, type, zero, weight)
Expand Down
88 changes: 53 additions & 35 deletions src/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,20 @@ function read_config(file::String)
end
end
if haskey(config, :delim)
config[:delim] = config[:delim] == "\\t" ? "\t" : config[:delim]
config[:delim] = unescape_string(config[:delim])
end
config
end

"""
read_calibration(file::String; analytetype::Type{A} = String, delim = "\\t") -> AbstractCalibration{A}
read_calibration(file::String; analytetype::Type{A} = String, delim = '\\t') -> AbstractCalibration{A}
Read ".mcal" or ".scal" file into julia as `MultipleCalibration` or `SingleCalibration`. `analytetype` is a concrete type for `analyte` which msut have a method for string input, and `delim` specifies delimiter for tabular data.
Read ".mcal" or ".scal" file into julia as `MultipleCalibration` or `SingleCalibration`. `analytetype` is a concrete type for `analyte` which msut have a method for string input,
and `delim` specifies delimiter for tabular data if `config[:delim]` does not exist.
See README.md for the structure of ".mcal" and ".scal" file.
"""
function read_calibration(file::String; analytetype = String, delim = "\t")
function read_calibration(file::String; analytetype = String, delim = '\t')
endswith(file, ".mcal") || endswith(file, ".scal") || throw(ArgumentError("The file is not a valid calibration directory"))
config = read_config(joinpath(file, "config.txt"))
if endswith(file, ".scal")
Expand All @@ -40,13 +41,14 @@ function read_calibration(file::String; analytetype = String, delim = "\t")
end

"""
read_datatable(file::String, T; analytetype::Type{A} = String, delim = "\\t") -> AbstractDataTable{A, S <: T}
read_datatable(file::String, T; analytetype::Type{A} = String, delim = '\\t') -> AbstractDataTable{A, S <: T}
Read ".dt" file into julia as `ColumnDataTable` or `RowDataTable`. `T` is the sink function for tabular data, `analytetype` is a concrete type for `analyte` which msut have a method for string input, and `delim` specifies delimiter for tabular data.
Read ".dt" file into julia as `ColumnDataTable` or `RowDataTable`. `T` is the sink function for tabular data, `analytetype` is a concrete type for `analyte` which msut have a method for string input,
and `delim` specifies delimiter for tabular data if `config[:delim]` does not exist.
See README.md for the structure of ".dt" file.
"""
function read_datatable(file::String, T; analytetype = String, delim = "\t")
function read_datatable(file::String, T; analytetype = String, delim = '\t')
endswith(file, ".dt") || throw(ArgumentError("The file is not a valid table directory"))
config = read_config(joinpath(file, "config.txt"))
delim = get(config, :delim, delim)
Expand All @@ -71,16 +73,16 @@ function read_datatable(file::String, T; analytetype = String, delim = "\t")
end

"""
read_analysistable(file::String, T; tabletype = T, analytetype::Type{A} = String, delim = "\\t") -> AnalysisTable{A, S <: T}
read_analysistable(file::String, T; tabletype = T, analytetype::Type{A} = String, delim = '\\t') -> AnalysisTable{A, S <: T}
Read ".at" file into julia as `AnalysisTable`. `T` is the sink function for tabular data, `tabletype` is `T` parameter in the type signature of `Batch` which determines the underlying table type
, and `analytetype` is a concrete type for `analyte` which msut have a method for string input, and `delim` specifies delimiter for tabular data.
, and `analytetype` is a concrete type for `analyte` which msut have a method for string input, and `delim` specifies delimiter for tabular data if `config[:delim]` in `tables` do not exist.
If `tabletype` is set to `nothing`, table type will be determined automatically which may be too restrict when using parameterized table types.
See README.md for the structure of ".at" file.
"""
function read_analysistable(file::String, T; tabletype = T, analytetype = String, delim = "\t")
function read_analysistable(file::String, T; tabletype = T, analytetype = String, delim = '\t')
endswith(file, ".at") || throw(ArgumentError("The file is not a valid table directory"))
files = filter!(f -> endswith(f, ".dt"), readdir(file))
tables = map(files) do f
Expand All @@ -90,18 +92,19 @@ function read_analysistable(file::String, T; tabletype = T, analytetype = String
Cons(Symbol.(replace.(files, Ref(".dt" => ""), Ref(r"^\d*_" => ""))), tables)
end
"""
read_methodtable(file::String, T; tabletype = T, analytetype::Type{A} = String, delim = "\\t") -> MethodTable{A, S <: T}
read_methodtable(file::String, T; tabletype = T, analytetype::Type{A} = String, delim = '\\t') -> MethodTable{A, S <: T}
Read ".mt" file into julia as `MethodTable`. `T` is the sink function for tabular data, `tabletype` is `T` parameter in the type signature of `MethodTable` which determines the underlying table type
, and `analytetype` is a concrete type for `analyte` which msut have a method for string input, and `delim` specifies delimiter for tabular data.
, and `analytetype` is a concrete type for `analyte` which msut have a method for string input, and `delim` specifies delimiter for tabular data if `config[:delim]` does not exist.
If `tabletype` is set to `nothing`, table type will be determined automatically which may be too restrict when using parameterized table types.
See README.md for the structure of ".mt" file.
"""
function read_methodtable(file::String, T; tabletype = T, analytetype = String, delim = "\t")
function read_methodtable(file::String, T; tabletype = T, analytetype = String, delim = '\t')
endswith(file, ".mt") || throw(ArgumentError("The file is not a valid table directory"))
config = read_config(joinpath(file, "config.txt"))
delim = get(config, :delim, delim)
signal = Symbol(get(config, :signal, :area))
analytetable = CSV.read(joinpath(file, "analytetable.txt"), Table)
analyte = analytetype.(analytetable.analyte)
Expand Down Expand Up @@ -130,19 +133,19 @@ function read_methodtable(file::String, T; tabletype = T, analytetype = String,
Cons(Table(; analyte, isd, calibration), signal, pointlevel, conctable, signaltable)
end
"""
read_batch(file::String, T; tabletype = T, analytetype = String) -> Batch{A, tabletype}
read_batch(file::String, T; tabletype = T, analytetype = String, delim = '\\t') -> Batch{A, tabletype}
Read ".batch" file into julia as `Batch`. `T` is the sink function for tabular data, `tabletype` is `T` parameter in the type signature of `Batch` which determines the underlying table type
, and `analytetype` is a concrete type for `analyte` which msut have a method for string input.
, and `analytetype` is a concrete type for `analyte` which must have a method for string input, and `delim` specifies delimiter for tabular data if `config[:delim]` does not exist
If `tabletype` is set to `nothing`, table type will be determined automatically which may be too restrict when using parameterized table types.
See README.md for the structure of ".batch" file.
"""
function read_batch(file::String, T; tabletype = T, analytetype = String)
function read_batch(file::String, T; tabletype = T, analytetype = String, delim = '\t')
endswith(file, ".batch") || throw(ArgumentError("The file is not a valid batch directory"))
config = read_config(joinpath(file, "config.txt"))
delim = config[:delim]
delim = get(config, :delim, delim)
method = read_methodtable(joinpath(file, "method.mt"), T; tabletype, analytetype, delim)
if !in("calibration", readdir(file)) || isempty(readdir(joinpath(file, "calibration")))
if isnothing(method.signaltable)
Expand Down Expand Up @@ -250,68 +253,69 @@ function show(io::IO, ::MIME"text/plain", tbl::MethodTable)
show(io, MIME"text/plain"(), isnothing(tbl.signaltable) ? nothing : tbl.signaltable.table)
end

function write(file::String, tbl::RowDataTable; delim = "\t")
function write(file::String, tbl::RowDataTable; delim = '\t')
mkpath(file)
open(joinpath(file, "config.txt"), "w+") do config
Base.write(config, "[Type]\nR\n\n[Analyte]\n", tbl.analytecol, "\n\n[Sample]\n", join(tbl.sample, "\n"))
Base.write(config, "[Type]\nR\n\n[delim]\n", escape_string(string(delim)), "\n\n[Analyte]\n", tbl.analytecol, "\n\n[Sample]\n", join(tbl.sample, "\n"))
end
CSV.write(joinpath(file, "table.txt"), tbl.table; delim)
end

function write(file::String, tbl::ColumnDataTable; delim = "\t")
function write(file::String, tbl::ColumnDataTable; delim = '\t')
mkpath(file)
open(joinpath(file, "config.txt"), "w+") do config
Base.write(config, "[Type]\nC\n\n[Analyte]\n", join(tbl.analytename, "\n"), "\n\n[Sample]\n", tbl.samplecol)
Base.write(config, "[Type]\nC\n\n[delim]\n", escape_string(string(delim)), "\n\n[Analyte]\n", join(tbl.analytename, "\n"), "\n\n[Sample]\n", tbl.samplecol)
end
CSV.write(joinpath(file, "table.txt"), tbl.table; delim)
end

function write(file::String, tbl::AnalysisTable; delim = "\t")
function write(file::String, tbl::AnalysisTable; delim = '\t')
mkpath(file)
rm.(readdir(file; join = true); recursive = true)
for (i, (k, v)) in enumerate(pairs(tbl.tables))
write(joinpath(file, "$(i - 1)_$k.dt"), v; delim)
end
end

function write(file::String, tbl::MethodTable; delim = "\t")
function write(file::String, tbl::MethodTable; delim = '\t')
mkpath(file)
write(joinpath(file, "true_concentration.dt"), tbl.conctable; delim)
isnothing(tbl.signaltable) || write(joinpath(file, "$(tbl.signal).dt"), tbl.signaltable; delim)
open(joinpath(file, "config.txt"), "w+") do config
Base.write(config, "[signal]\n", tbl.signal, "\n\n[pointlevel]\n", join(tbl.pointlevel, "\n"))
Base.write(config, "[signal]\n", tbl.signal, "\n\n[delim]\n", escape_string(string(delim)), "\n\n[pointlevel]\n", join(tbl.pointlevel, "\n"))
end
CSV.write(joinpath(file, "analytetable.txt"), tbl.analytetable; delim)
end

function write(file::String, cal::MultipleCalibration; delim = "\t")
function write(file::String, cal::MultipleCalibration; delim = '\t')
mkpath(file)
open(joinpath(file, "config.txt"), "w+") do config
Base.write(config, "[analyte]\n", string(first(cal.analyte)), "\n\n[isd]\n", string(last(cal.analyte)),
"\n\n[type]\n", string(cal.type), "\n\n[zero]\n", string(cal.zero), "\n\n[weight]\n", string(cal.weight))
"\n\n[type]\n", string(cal.type), "\n\n[zero]\n", string(cal.zero), "\n\n[weight]\n", string(cal.weight),
"\n\n[delim]\n", escape_string(string(delim)))
end
CSV.write(joinpath(file, "table.txt"), cal.table; delim)
end
function write(file::String, cal::SingleCalibration; delim = "\t")
function write(file::String, cal::SingleCalibration; delim = '\t')
mkpath(file)
open(joinpath(file, "config.txt"), "w+") do config
Base.write(config, "[analyte]\n", string(first(cal.analyte)), "\n\n[conc]\n", string(cal.conc))
end
end

"""
Calibration.write(file::String, object; delim = "\\t")
ChemistryQuantitativeAnalysis.write(file::String, object; delim = "\\t")
Write `object` into ".scal" for `SingleCalibration`, ".mcal" for `MultipleCalibration`, ".at" for `AnalysisTable`, ".mt" for `MethodTable`, and ".batch" for `Batch`.
`delim` specifies delimiter for tabular data.
`delim` specifies delimiter for tabular data if `config[:delim]` does not exist.
See README.md for the structure of all files.
"""
function write(file::String, batch::Batch; delim = "\t")
function write(file::String, batch::Batch; delim = '\t')
mkpath(file)
open(joinpath(file, "config.txt"), "w+") do config
Base.write(config, "[delim]\n", delim == "\t" ? "\\t" : ",")
Base.write(config, "[delim]\n", escape_string(string(delim)))
end
write(joinpath(file, "method.mt"), batch.method; delim)
mkpath(joinpath(file, "calibration"))
Expand All @@ -323,13 +327,27 @@ function write(file::String, batch::Batch; delim = "\t")
end

"""
Calibration.read(file::String, T; tabletype = T, analytetype = String) -> Batch{A, tabletype}
ChemistryQuantitativeAnalysis.read(file::String, T; tabletype = T, analytetype = String, delim = '\\t') -> Batch{A, tabletype}
Read ".batch" file into julia as `Batch`. `T` is the sink function for tabular data, `tabletype` is `T` parameter in the type signature of `Batch` which determines the underlying table type
, and `analytetype` is a concrete type for `analyte` which msut have a method for string input.
Read ".scal" as `SingleCalibration`, ".mcal" as `MultipleCalibration`, ".at" as `AnalysisTable`, ".mt" as `MethodTable`, and ".batch" as `Batch`.
`T` is the sink function for tabular data, `tabletype` is `T` parameter in the type signature which determines the underlying table type
, `analytetype` is a concrete type for `analyte` which msut have a method for string input, and `delim` specifies delimiter for tabular data if `config[:delim]` does not exist.
If `tabletype` is set to `nothing`, table type will be determined automatically which may be too restrict when using parameterized table types.
See README.md for the structure of ".batch" file.
"""
const read = read_batch
function read(file::String, T; tabletype = T, analytetype = String, delim = '\t')
if endswith(file, ".batch")
read_batch(file, T; tabletype, analytetype, delim)
elseif endswith(file, ".scal") || endswith(file, ".mcal")
read_calibration(file; analytetype, delim)
elseif endswith(file, ".at")
read_analysistable(file, T; tabletype, analytetype, delim)
elseif endswith(file, ".mt")
read_methodtable(file, T; tabletype, analytetype, delim)
elseif endswith(file, ".dt")
read_datatable(file, T; analytetype, delim)
end
end
3 changes: 3 additions & 0 deletions test/data/initial_mc_c.batch/data.at/0_area.dt/config.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[Type]
C

[delim]
\t

[Analyte]
Analyte1
Analyte2
Expand Down
3 changes: 3 additions & 0 deletions test/data/initial_mc_c.batch/method.mt/area.dt/config.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[Type]
C

[delim]
\t

[Analyte]
Analyte1
Analyte2
Expand Down
3 changes: 3 additions & 0 deletions test/data/initial_mc_c.batch/method.mt/config.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[signal]
area

[delim]
\t

[pointlevel]
1
1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[Type]
C

[delim]
\t

[Analyte]
Analyte1
Analyte2
Expand Down
3 changes: 3 additions & 0 deletions test/data/initial_mc_r.batch/data.at/0_area.dt/config.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
[Type]
R

[delim]
\t

[Analyte]
Analyte

Expand Down
Loading

0 comments on commit 247166b

Please sign in to comment.