diff --git a/_snippets/describe-objects.md b/_snippets/describe-objects.md deleted file mode 100644 index d51afd8..0000000 --- a/_snippets/describe-objects.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Object transform -description: Recursively describe objects -source: https://stackoverflow.com/a/77322330 -input: data/type.json -output: | - [{"name":"id","type":"number","value":123456}, - {"name":"name","type":"string","value":"Test"}, - {"name":"arraySimple","type":"array","value":[1,2,3]}, - {"name":"arrayComplex","type":"array","value":[{"id":38392}]}, - {"name":"instance","type":"object","value":[{"name":"id","type":"number","value":8310}]}, - {"name":"isEmpty","type":"boolean","value":false}, - {"name":"empty","type":"null","value":null}] ---- - -def describef: { - type: type, - value : (objects |= (to_entries | map({name: .key} + (.value | describef)))) -}; -describef.value diff --git a/data-transform.md b/data-transform.md deleted file mode 100644 index f6944df..0000000 --- a/data-transform.md +++ /dev/null @@ -1,80 +0,0 @@ -# Data transformation - -## json2ini - -Adapted from https://stackoverflow.com/a/76665197 - -Data used: [nested.json](data/nested.json) - -``` -{ - "server": { - "atx": { - "user": "annie", - "port": 22 - } - }, - "storage": { - "nyc": { - "user": "nntrn", - "port": 22 - } - } -} -``` - -```sh -jq --stream -nr ' reduce (inputs | select(has(1))) as [$path, $val] - ( {}; .[$path[:-1] | join(".")][$path[-1]] = $val ) -| to_entries[] -| "[\(.key)]", (.value | to_entries[] | "\(.key) = \(.value)" ) -, ""' -``` - -Output - -``` -[server.atx] -user = annie -port = 22 - -[storage.nyc] -user = nntrn -port = 22 -``` - -## tsv2json - -Adapted from https://stackoverflow.com/a/55996042 - -Data used: [tmp.tsv](data/tmp.tsv) - -``` -foo bar baz -1 2 3 -4 5 6 -``` - -```sh -jq -R 'split("[\\s\\t]+";"x") as $head -| [inputs | split("[\\s\\t]+";"x")] -| map( . as $row | reduce (keys|.[]) as $x - ( {}; . + {"\($head[$x])":$row[$x]} ) )' data/tmp.tsv -``` - -Output - -```json -[ - { - "foo": "1", - "bar": "2", - "baz": "3" - }, - { - "foo": "4", - "bar": "5", - "baz": "6" - } -] -``` diff --git a/data-transform.md b/data-transform.md new file mode 120000 index 0000000..a989839 --- /dev/null +++ b/data-transform.md @@ -0,0 +1 @@ +general/data-transform.md \ No newline at end of file diff --git a/functions/summary.md b/functions/summary.md index ca73947..ce2ae9d 100644 --- a/functions/summary.md +++ b/functions/summary.md @@ -1,4 +1,4 @@ -# summary +# Summary ```jq def grouped_summary($item): diff --git a/functions/text.md b/functions/text.md new file mode 100644 index 0000000..5ada180 --- /dev/null +++ b/functions/text.md @@ -0,0 +1,25 @@ +# Text functions + +## Recursively split strings w/ new lines + +```jq +def split_newlines($s): + if ((type == "string") and (($s|tostring|split("\n")|length) > 1)?) + then ($s|tostring|split("[\\r\\n]+([\\s]+)?";"x")) + elif (type == "object") then to_entries + else $s end; + +def recuse_split_newlines: walk(split_newlines(.)|from_entries? // .); +``` + +## Quoting + +```jq +def squo: [39]|implode; + +def squote($text): [squo,$text,squo]|join(""); +def dquote($text): "\"\($text)\""; + +def unsmart($text): $text | gsub("[“”]";"\"") | gsub("[’‘]";"'"); +def unsmart: . | unsmart; +``` \ No newline at end of file diff --git a/functions/unroll.md b/functions/unroll.md index 1e3ac54..b01a822 100644 --- a/functions/unroll.md +++ b/functions/unroll.md @@ -2,15 +2,15 @@ ```jq [leaf_paths as $path | { - "key": $path | map(tostring) | join("_"), - "value": getpath($path) + "key": $path | map(tostring) | join("_"), + "value": getpath($path) }] | from_entries ``` ```console -$ cat data/nested.json|jq '[leaf_paths as $path | { - "key": $path | map(tostring) | join("_"), - "value": getpath($path) +$ cat data/nested.json|jq '[leaf_paths as $path | { + "key": $path | map(tostring) | join("_"), + "value": getpath($path) }] | from_entries ' { @@ -23,54 +23,52 @@ $ cat data/nested.json|jq '[leaf_paths as $path | { ```jq def categorize: - # Returns "object", "array" or "scalar" to indicate the category - # of the piped element. - if type == "object" then "object" - elif type == "array" then "array" - else "scalar" - end; + # Returns "object", "array" or "scalar" to indicate the category + # of the piped element. + if type == "object" then "object" + elif type == "array" then "array" + else "scalar" + end; def pluck($category): - # Plucks the children of a particular category from piped element. - if categorize != "object" - then empty - else to_entries[] - | select(.value | categorize == $category) - | .value - end; - + # Plucks the children of a particular category from piped element. + if categorize != "object" + then empty + else to_entries[] + | select(.value | categorize == $category) + | .value + end; + def split: - # Splits the piped element's children into arrays, scalars, and objects - # and returns a meta object containing the children seperated by these - # keys. If the piped element is a scalar or array, this does not look - # at the children, but just returns that element in the meta object. - if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] } - elif categorize == "array" then { objects: [], arrays: [.], scalars: [] } - else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] } - end; + # Splits the piped element's children into arrays, scalars, and objects + # and returns a meta object containing the children seperated by these + # keys. If the piped element is a scalar or array, this does not look + # at the children, but just returns that element in the meta object. + if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] } + elif categorize == "array" then { objects: [], arrays: [.], scalars: [] } + else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] } + end; def unwrap: - # Unwraps an array recursively, until the elements of the returned array + # Unwraps an array recursively, until the elements of the returned array # are either scalars or objects but not arrays. If piped element is not # an array, returns the element as is. - if type != "array" then . elif length == 0 then empty else .[] | unwrap end; def extract($category): - # Extracts the elements of a particular category from the piped in array. - # If the piped in element is not an array, this fn acts as filter to + # Extracts the elements of a particular category from the piped in array. + # If the piped in element is not an array, this fn acts as filter to # only return the element if it is of the desired category. unwrap | select(.| categorize == $category); -def unroll: +def unroll: # Unrolls the passed in object recursively until only scalars are left. # Returns a row for each leaf node of tree structure of the object and # elements of the row would be all the scalars encountered at all the # ancestor levels of this left node. - . | .result += .state.scalars | .state.objects += [.state.arrays | extract("object")] | .state.objects += [.state.arrays | extract("scalar")] diff --git a/general/data-transform.md b/general/data-transform.md new file mode 100644 index 0000000..f6944df --- /dev/null +++ b/general/data-transform.md @@ -0,0 +1,80 @@ +# Data transformation + +## json2ini + +Adapted from https://stackoverflow.com/a/76665197 + +Data used: [nested.json](data/nested.json) + +``` +{ + "server": { + "atx": { + "user": "annie", + "port": 22 + } + }, + "storage": { + "nyc": { + "user": "nntrn", + "port": 22 + } + } +} +``` + +```sh +jq --stream -nr ' reduce (inputs | select(has(1))) as [$path, $val] + ( {}; .[$path[:-1] | join(".")][$path[-1]] = $val ) +| to_entries[] +| "[\(.key)]", (.value | to_entries[] | "\(.key) = \(.value)" ) +, ""' +``` + +Output + +``` +[server.atx] +user = annie +port = 22 + +[storage.nyc] +user = nntrn +port = 22 +``` + +## tsv2json + +Adapted from https://stackoverflow.com/a/55996042 + +Data used: [tmp.tsv](data/tmp.tsv) + +``` +foo bar baz +1 2 3 +4 5 6 +``` + +```sh +jq -R 'split("[\\s\\t]+";"x") as $head +| [inputs | split("[\\s\\t]+";"x")] +| map( . as $row | reduce (keys|.[]) as $x + ( {}; . + {"\($head[$x])":$row[$x]} ) )' data/tmp.tsv +``` + +Output + +```json +[ + { + "foo": "1", + "bar": "2", + "baz": "3" + }, + { + "foo": "4", + "bar": "5", + "baz": "6" + } +] +``` diff --git a/general/examples.md b/general/examples.md index 28084c4..a75325f 100644 --- a/general/examples.md +++ b/general/examples.md @@ -53,10 +53,10 @@ $ jq '.[] |= if .attr2 then (.attr2 = "bax") else . end' [[Source]](https://github.com/stedolan/jq/issues/873#issuecomment-125385615) -## Slurpfiles +## Slurp-file ```console -$ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/titanic.json +$ jq --slurpfile cars data/cars.json '{titanic: .[0:1], cars: $cars[][0:1]}' data/titanic.json { "titanic": [ { @@ -68,16 +68,6 @@ $ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/ "Siblings_Spouses_Aboard": 1, "Parents_Children_Aboard": 0, "Fare": 7.25 - }, - { - "Survived": 1, - "Pclass": 1, - "Name": "Mrs. John Bradley (Florence Briggs Thayer) Cumings", - "Sex": "female", - "Age": 38, - "Siblings_Spouses_Aboard": 1, - "Parents_Children_Aboard": 0, - "Fare": 71.2833 } ], "cars": [ @@ -91,17 +81,6 @@ $ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/ "weight_lb_": 3821, "0_60_mph_s_": 11, "year": 1973 - }, - { - "name": "AMC Ambassador DPL", - "brand": "AMC", - "economy_mpg_": 15, - "cylinders": 8, - "displacement_cc_": 390, - "power_hp_": 190, - "weight_lb_": 3850, - "0_60_mph_s_": 8.5, - "year": 1970 } ] } diff --git a/recipes.jq b/recipes.jq index 6b8f1b3..0cb121b 100644 --- a/recipes.jq +++ b/recipes.jq @@ -9,6 +9,159 @@ # https://github.com/nntrn/jq-recipes # +########################################################################################## +# _site/functions/barcharts.md +########################################################################################## + +def barchart($key): + length as $total + | map((.[$key] // "null") | tostring) + | group_by(.) + | (map({ key: .[0], value: length, title_len: (.[0]|tostring|length) }) ) as $columns + | $columns + | sort_by(.value) | reverse + | (max_by(.title_len)|.title_len) as $padding + | + (if (((($columns|length)/$total) > .8) or (($columns|length) > 1000)) then + [ "IGNORING <\($key)>: \($columns|length) out of \($total) rows", ""] + else [ + $key, + ("-" * ($key|length) ), + map( + [ + .key, (" " * ($padding-.title_len)), + "\((.value/$total)*100|tostring|.+".000"|.[0:4])%", + ( if (.value == 1) then "▊" else ("█" * (((.value/$total)*100) + (.value|log)|round)) end), + .value + ] | join(" ") + ), + "" + ] end) + | flatten + | join("\n"); + +def run_barchart: + . as $data + | (.[0]|keys) as $cols + | ($cols | map(. as $col | $data | barchart($col)) | join("\n")) as $barcharts + | [ $barcharts ] + | flatten| join("\n") +; + +########################################################################################## +# _site/functions/conversion.md +########################################################################################## + +def to_precision($p): + . |tostring|split(".") + | [.[0], (.[1]|split("")|.[0:($p|tonumber)]|join(""))] + | join(".") + | tonumber; + +def humansize(bytes;$p): + (bytes|tonumber) as $size | + if $size > 1073741824 then "\(($size/1073741824)|to_precision($p))G" + elif $size > 1048576 then "\(($size/1048576)|to_precision($p))M" + elif $size > 1024 then "\(($size/1024)|to_precision($p))K" + else $size + end; + +def humansize(bytes): humansize(bytes;1); + +########################################################################################## +# _site/functions/json2csv.md +########################################################################################## + +def json2csv: + (map(keys) | add | unique) as $cols + | map(. as $row | $cols | map($row[.])) as $rows + | $cols, $rows[] + | @csv; + +########################################################################################## +# _site/functions/pick.md +########################################################################################## + +def pick(stream): + . as $in + | reduce path(stream) as $a (null; + setpath($a; $in|getpath($a)) ); + +########################################################################################## +# _site/functions/read-history.md +########################################################################################## + +def history: + map( + if test("#[0-9]{10,12}") + then "\(.|gsub("#";"")|tonumber|todate)" + else "\t\(.)\n" + end + ) | join(""); + +########################################################################################## +# _site/functions/summary.md +########################################################################################## + +def grouped_summary($item): + {"\($item? // "blank")":group_by(.[$item])|map({"\(.[0][$item]? // "blank")":length})|add}; + +def summary: + [ (.[0]|keys)[] as $keys | grouped_summary($keys)] + | add + | to_entries + | map( + del(select(((.value//"")|keys[0]|length) > 100)) | + del(select(((.value//"")|values|length) > 400)) + ) + | map(select(.)) + | from_entries; + +def summary_wip: + [ (.[0]|keys)[] as $keys | grouped_summary($keys)] + | add + | to_entries + #| map(del(select(((.value//"")|keys|length) > 400))) + | map(select(.)|{key,count:(.value|length)}) + | map(.value |= to_entries); + +def summary2: + . as $data + | (.[0]|keys) + | map(. as $item | { + key: $item, + value: ($data|map(.[$item])|group_by(.)|map({"\(.[0])": length}))|add + }) + | map(select((.value|to_entries|length)< (.90 * ($data|length)))) + | from_entries; + +########################################################################################## +# _site/general/codepoints.md +########################################################################################## + +def smart_squotes($s): + $s | if (test("[\\s\\n\\t]";"x")) then "\([39]|implode)\($s)\([39]|implode)" else $s end; + +def smart_dquotes($s): + $s | if (test("[\\s\\n\\t]";"x")) then "\($s|@json)" else $s end; + +########################################################################################## +# _site/general/reduce.md +########################################################################################## + +def tocsv: + .[0] as $cols | .[1:] + | map(. as $row + | $cols + | with_entries({ "key": .value,"value": $row[.key]}) + ); + +########################################################################################## +# _site/general/wrangle.md +########################################################################################## + +def s: [splits(" +")]; + ########################################################################################## # functions/barcharts.md ########################################################################################## @@ -175,6 +328,91 @@ def summary2: | map(select((.value|to_entries|length)< (.90 * ($data|length)))) | from_entries; +########################################################################################## +# functions/text.md +########################################################################################## + +def split_newlines($s): + if ((type == "string") and (($s|tostring|split("\n")|length) > 1)?) + then ($s|tostring|split("[\\r\\n]+([\\s]+)?";"x")) + elif (type == "object") then to_entries + else $s end; + +def recuse_split_newlines: walk(split_newlines(.)|from_entries? // .); + +def squo: [39]|implode; + +def squote($text): [squo,$text,squo]|join(""); +def dquote($text): "\"\($text)\""; + +def unsmart($text): $text | gsub("[“”]";"\"") | gsub("[’‘]";"'"); +def unsmart: . | unsmart; + +########################################################################################## +# functions/unroll.md +########################################################################################## + +def categorize: + # Returns "object", "array" or "scalar" to indicate the category + # of the piped element. + if type == "object" then "object" + elif type == "array" then "array" + else "scalar" + end; + +def pluck($category): + # Plucks the children of a particular category from piped element. + if categorize != "object" + then empty + else to_entries[] + | select(.value | categorize == $category) + | .value + end; + +def split: + # Splits the piped element's children into arrays, scalars, and objects + # and returns a meta object containing the children seperated by these + # keys. If the piped element is a scalar or array, this does not look + # at the children, but just returns that element in the meta object. + if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] } + elif categorize == "array" then { objects: [], arrays: [.], scalars: [] } + else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] } + end; + +def unwrap: + # Unwraps an array recursively, until the elements of the returned array + # are either scalars or objects but not arrays. If piped element is not + # an array, returns the element as is. + if type != "array" then . + elif length == 0 then empty + else .[] | unwrap + end; + +def extract($category): + # Extracts the elements of a particular category from the piped in array. + # If the piped in element is not an array, this fn acts as filter to + # only return the element if it is of the desired category. + unwrap | select(.| categorize == $category); + +def unroll: + # Unrolls the passed in object recursively until only scalars are left. + # Returns a row for each leaf node of tree structure of the object and + # elements of the row would be all the scalars encountered at all the + # ancestor levels of this left node. + . | .result += .state.scalars + | .state.objects += [.state.arrays | extract("object")] + | .state.objects += [.state.arrays | extract("scalar")] + | if (.state.objects | length == 0 ) + then .result + else ({ data : .state.objects, + state: .state.objects[] | split, + result: .result + } | unroll) + end; + +def unrolls($data): { data: $data, state: $data| split, result: [] } | unroll ; +def unrolls: unrolls(.); + ########################################################################################## # general/codepoints.md ##########################################################################################