From 283d85deb6d36f4868217d31b8edbbe2bb38ad3b Mon Sep 17 00:00:00 2001
From: nntrn <17685332+nntrn@users.noreply.github.com>
Date: Mon, 4 Mar 2024 11:33:18 -0600
Subject: [PATCH] Update examples
---
_snippets/describe-objects.md | 20 ---
data-transform.md | 81 +-----------
functions/summary.md | 2 +-
functions/text.md | 25 ++++
functions/unroll.md | 64 +++++----
general/data-transform.md | 80 ++++++++++++
general/examples.md | 25 +---
recipes.jq | 238 ++++++++++++++++++++++++++++++++++
8 files changed, 378 insertions(+), 157 deletions(-)
delete mode 100644 _snippets/describe-objects.md
mode change 100644 => 120000 data-transform.md
create mode 100644 functions/text.md
create mode 100644 general/data-transform.md
diff --git a/_snippets/describe-objects.md b/_snippets/describe-objects.md
deleted file mode 100644
index d51afd8..0000000
--- a/_snippets/describe-objects.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-title: Object transform
-description: Recursively describe objects
-source: https://stackoverflow.com/a/77322330
-input: data/type.json
-output: |
- [{"name":"id","type":"number","value":123456},
- {"name":"name","type":"string","value":"Test"},
- {"name":"arraySimple","type":"array","value":[1,2,3]},
- {"name":"arrayComplex","type":"array","value":[{"id":38392}]},
- {"name":"instance","type":"object","value":[{"name":"id","type":"number","value":8310}]},
- {"name":"isEmpty","type":"boolean","value":false},
- {"name":"empty","type":"null","value":null}]
----
-
-def describef: {
- type: type,
- value : (objects |= (to_entries | map({name: .key} + (.value | describef))))
-};
-describef.value
diff --git a/data-transform.md b/data-transform.md
deleted file mode 100644
index f6944df..0000000
--- a/data-transform.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# Data transformation
-
-## json2ini
-
-Adapted from https://stackoverflow.com/a/76665197
-
-Data used: [nested.json](data/nested.json)
-
-```
-{
- "server": {
- "atx": {
- "user": "annie",
- "port": 22
- }
- },
- "storage": {
- "nyc": {
- "user": "nntrn",
- "port": 22
- }
- }
-}
-```
-
-```sh
-jq --stream -nr ' reduce (inputs | select(has(1))) as [$path, $val]
- ( {}; .[$path[:-1] | join(".")][$path[-1]] = $val )
-| to_entries[]
-| "[\(.key)]", (.value | to_entries[] | "\(.key) = \(.value)" )
-, ""'
-```
-
-Output
-
-```
-[server.atx]
-user = annie
-port = 22
-
-[storage.nyc]
-user = nntrn
-port = 22
-```
-
-## tsv2json
-
-Adapted from https://stackoverflow.com/a/55996042
-
-Data used: [tmp.tsv](data/tmp.tsv)
-
-```
-foo bar baz
-1 2 3
-4 5 6
-```
-
-```sh
-jq -R 'split("[\\s\\t]+";"x") as $head
-| [inputs | split("[\\s\\t]+";"x")]
-| map( . as $row | reduce (keys|.[]) as $x
- ( {}; . + {"\($head[$x])":$row[$x]} ) )' data/tmp.tsv
-```
-
-Output
-
-```json
-[
- {
- "foo": "1",
- "bar": "2",
- "baz": "3"
- },
- {
- "foo": "4",
- "bar": "5",
- "baz": "6"
- }
-]
-```
diff --git a/data-transform.md b/data-transform.md
new file mode 120000
index 0000000..a989839
--- /dev/null
+++ b/data-transform.md
@@ -0,0 +1 @@
+general/data-transform.md
\ No newline at end of file
diff --git a/functions/summary.md b/functions/summary.md
index ca73947..ce2ae9d 100644
--- a/functions/summary.md
+++ b/functions/summary.md
@@ -1,4 +1,4 @@
-# summary
+# Summary
```jq
def grouped_summary($item):
diff --git a/functions/text.md b/functions/text.md
new file mode 100644
index 0000000..5ada180
--- /dev/null
+++ b/functions/text.md
@@ -0,0 +1,25 @@
+# Text functions
+
+## Recursively split strings w/ new lines
+
+```jq
+def split_newlines($s):
+ if ((type == "string") and (($s|tostring|split("\n")|length) > 1)?)
+ then ($s|tostring|split("[\\r\\n]+([\\s]+)?";"x"))
+ elif (type == "object") then to_entries
+ else $s end;
+
+def recuse_split_newlines: walk(split_newlines(.)|from_entries? // .);
+```
+
+## Quoting
+
+```jq
+def squo: [39]|implode;
+
+def squote($text): [squo,$text,squo]|join("");
+def dquote($text): "\"\($text)\"";
+
+def unsmart($text): $text | gsub("[“”]";"\"") | gsub("[’‘]";"'");
+def unsmart: . | unsmart;
+```
\ No newline at end of file
diff --git a/functions/unroll.md b/functions/unroll.md
index 1e3ac54..b01a822 100644
--- a/functions/unroll.md
+++ b/functions/unroll.md
@@ -2,15 +2,15 @@
```jq
[leaf_paths as $path | {
- "key": $path | map(tostring) | join("_"),
- "value": getpath($path)
+ "key": $path | map(tostring) | join("_"),
+ "value": getpath($path)
}] | from_entries
```
```console
-$ cat data/nested.json|jq '[leaf_paths as $path | {
- "key": $path | map(tostring) | join("_"),
- "value": getpath($path)
+$ cat data/nested.json|jq '[leaf_paths as $path | {
+ "key": $path | map(tostring) | join("_"),
+ "value": getpath($path)
}] | from_entries
'
{
@@ -23,54 +23,52 @@ $ cat data/nested.json|jq '[leaf_paths as $path | {
```jq
def categorize:
- # Returns "object", "array" or "scalar" to indicate the category
- # of the piped element.
- if type == "object" then "object"
- elif type == "array" then "array"
- else "scalar"
- end;
+ # Returns "object", "array" or "scalar" to indicate the category
+ # of the piped element.
+ if type == "object" then "object"
+ elif type == "array" then "array"
+ else "scalar"
+ end;
def pluck($category):
- # Plucks the children of a particular category from piped element.
- if categorize != "object"
- then empty
- else to_entries[]
- | select(.value | categorize == $category)
- | .value
- end;
-
+ # Plucks the children of a particular category from piped element.
+ if categorize != "object"
+ then empty
+ else to_entries[]
+ | select(.value | categorize == $category)
+ | .value
+ end;
+
def split:
- # Splits the piped element's children into arrays, scalars, and objects
- # and returns a meta object containing the children seperated by these
- # keys. If the piped element is a scalar or array, this does not look
- # at the children, but just returns that element in the meta object.
- if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] }
- elif categorize == "array" then { objects: [], arrays: [.], scalars: [] }
- else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] }
- end;
+ # Splits the piped element's children into arrays, scalars, and objects
+ # and returns a meta object containing the children seperated by these
+ # keys. If the piped element is a scalar or array, this does not look
+ # at the children, but just returns that element in the meta object.
+ if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] }
+ elif categorize == "array" then { objects: [], arrays: [.], scalars: [] }
+ else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] }
+ end;
def unwrap:
- # Unwraps an array recursively, until the elements of the returned array
+ # Unwraps an array recursively, until the elements of the returned array
# are either scalars or objects but not arrays. If piped element is not
# an array, returns the element as is.
-
if type != "array" then .
elif length == 0 then empty
else .[] | unwrap
end;
def extract($category):
- # Extracts the elements of a particular category from the piped in array.
- # If the piped in element is not an array, this fn acts as filter to
+ # Extracts the elements of a particular category from the piped in array.
+ # If the piped in element is not an array, this fn acts as filter to
# only return the element if it is of the desired category.
unwrap | select(.| categorize == $category);
-def unroll:
+def unroll:
# Unrolls the passed in object recursively until only scalars are left.
# Returns a row for each leaf node of tree structure of the object and
# elements of the row would be all the scalars encountered at all the
# ancestor levels of this left node.
-
. | .result += .state.scalars
| .state.objects += [.state.arrays | extract("object")]
| .state.objects += [.state.arrays | extract("scalar")]
diff --git a/general/data-transform.md b/general/data-transform.md
new file mode 100644
index 0000000..f6944df
--- /dev/null
+++ b/general/data-transform.md
@@ -0,0 +1,80 @@
+# Data transformation
+
+## json2ini
+
+Adapted from https://stackoverflow.com/a/76665197
+
+Data used: [nested.json](data/nested.json)
+
+```
+{
+ "server": {
+ "atx": {
+ "user": "annie",
+ "port": 22
+ }
+ },
+ "storage": {
+ "nyc": {
+ "user": "nntrn",
+ "port": 22
+ }
+ }
+}
+```
+
+```sh
+jq --stream -nr ' reduce (inputs | select(has(1))) as [$path, $val]
+ ( {}; .[$path[:-1] | join(".")][$path[-1]] = $val )
+| to_entries[]
+| "[\(.key)]", (.value | to_entries[] | "\(.key) = \(.value)" )
+, ""'
+```
+
+Output
+
+```
+[server.atx]
+user = annie
+port = 22
+
+[storage.nyc]
+user = nntrn
+port = 22
+```
+
+## tsv2json
+
+Adapted from https://stackoverflow.com/a/55996042
+
+Data used: [tmp.tsv](data/tmp.tsv)
+
+```
+foo bar baz
+1 2 3
+4 5 6
+```
+
+```sh
+jq -R 'split("[\\s\\t]+";"x") as $head
+| [inputs | split("[\\s\\t]+";"x")]
+| map( . as $row | reduce (keys|.[]) as $x
+ ( {}; . + {"\($head[$x])":$row[$x]} ) )' data/tmp.tsv
+```
+
+Output
+
+```json
+[
+ {
+ "foo": "1",
+ "bar": "2",
+ "baz": "3"
+ },
+ {
+ "foo": "4",
+ "bar": "5",
+ "baz": "6"
+ }
+]
+```
diff --git a/general/examples.md b/general/examples.md
index 28084c4..a75325f 100644
--- a/general/examples.md
+++ b/general/examples.md
@@ -53,10 +53,10 @@ $ jq '.[] |= if .attr2 then (.attr2 = "bax") else . end'
[[Source]](https://github.com/stedolan/jq/issues/873#issuecomment-125385615)
-## Slurpfiles
+## Slurp-file
```console
-$ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/titanic.json
+$ jq --slurpfile cars data/cars.json '{titanic: .[0:1], cars: $cars[][0:1]}' data/titanic.json
{
"titanic": [
{
@@ -68,16 +68,6 @@ $ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/
"Siblings_Spouses_Aboard": 1,
"Parents_Children_Aboard": 0,
"Fare": 7.25
- },
- {
- "Survived": 1,
- "Pclass": 1,
- "Name": "Mrs. John Bradley (Florence Briggs Thayer) Cumings",
- "Sex": "female",
- "Age": 38,
- "Siblings_Spouses_Aboard": 1,
- "Parents_Children_Aboard": 0,
- "Fare": 71.2833
}
],
"cars": [
@@ -91,17 +81,6 @@ $ jq --slurpfile cars data/cars.json '{titanic: .[0:2],cars:$cars[][0:2]}' data/
"weight_lb_": 3821,
"0_60_mph_s_": 11,
"year": 1973
- },
- {
- "name": "AMC Ambassador DPL",
- "brand": "AMC",
- "economy_mpg_": 15,
- "cylinders": 8,
- "displacement_cc_": 390,
- "power_hp_": 190,
- "weight_lb_": 3850,
- "0_60_mph_s_": 8.5,
- "year": 1970
}
]
}
diff --git a/recipes.jq b/recipes.jq
index 6b8f1b3..0cb121b 100644
--- a/recipes.jq
+++ b/recipes.jq
@@ -9,6 +9,159 @@
# https://github.com/nntrn/jq-recipes
#
+##########################################################################################
+# _site/functions/barcharts.md
+##########################################################################################
+
+def barchart($key):
+ length as $total
+ | map((.[$key] // "null") | tostring)
+ | group_by(.)
+ | (map({ key: .[0], value: length, title_len: (.[0]|tostring|length) }) ) as $columns
+ | $columns
+ | sort_by(.value) | reverse
+ | (max_by(.title_len)|.title_len) as $padding
+ |
+ (if (((($columns|length)/$total) > .8) or (($columns|length) > 1000)) then
+ [ "IGNORING <\($key)>: \($columns|length) out of \($total) rows", ""]
+ else [
+ $key,
+ ("-" * ($key|length) ),
+ map(
+ [
+ .key, (" " * ($padding-.title_len)),
+ "\((.value/$total)*100|tostring|.+".000"|.[0:4])%",
+ ( if (.value == 1) then "▊" else ("█" * (((.value/$total)*100) + (.value|log)|round)) end),
+ .value
+ ] | join(" ")
+ ),
+ ""
+ ] end)
+ | flatten
+ | join("\n");
+
+def run_barchart:
+ . as $data
+ | (.[0]|keys) as $cols
+ | ($cols | map(. as $col | $data | barchart($col)) | join("\n")) as $barcharts
+ | [ $barcharts ]
+ | flatten| join("\n")
+;
+
+##########################################################################################
+# _site/functions/conversion.md
+##########################################################################################
+
+def to_precision($p):
+ . |tostring|split(".")
+ | [.[0], (.[1]|split("")|.[0:($p|tonumber)]|join(""))]
+ | join(".")
+ | tonumber;
+
+def humansize(bytes;$p):
+ (bytes|tonumber) as $size |
+ if $size > 1073741824 then "\(($size/1073741824)|to_precision($p))G"
+ elif $size > 1048576 then "\(($size/1048576)|to_precision($p))M"
+ elif $size > 1024 then "\(($size/1024)|to_precision($p))K"
+ else $size
+ end;
+
+def humansize(bytes): humansize(bytes;1);
+
+##########################################################################################
+# _site/functions/json2csv.md
+##########################################################################################
+
+def json2csv:
+ (map(keys) | add | unique) as $cols
+ | map(. as $row | $cols | map($row[.])) as $rows
+ | $cols, $rows[]
+ | @csv;
+
+##########################################################################################
+# _site/functions/pick.md
+##########################################################################################
+
+def pick(stream):
+ . as $in
+ | reduce path(stream) as $a (null;
+ setpath($a; $in|getpath($a)) );
+
+##########################################################################################
+# _site/functions/read-history.md
+##########################################################################################
+
+def history:
+ map(
+ if test("#[0-9]{10,12}")
+ then "\(.|gsub("#";"")|tonumber|todate)"
+ else "\t\(.)\n"
+ end
+ ) | join("");
+
+##########################################################################################
+# _site/functions/summary.md
+##########################################################################################
+
+def grouped_summary($item):
+ {"\($item? // "blank")":group_by(.[$item])|map({"\(.[0][$item]? // "blank")":length})|add};
+
+def summary:
+ [ (.[0]|keys)[] as $keys | grouped_summary($keys)]
+ | add
+ | to_entries
+ | map(
+ del(select(((.value//"")|keys[0]|length) > 100)) |
+ del(select(((.value//"")|values|length) > 400))
+ )
+ | map(select(.))
+ | from_entries;
+
+def summary_wip:
+ [ (.[0]|keys)[] as $keys | grouped_summary($keys)]
+ | add
+ | to_entries
+ #| map(del(select(((.value//"")|keys|length) > 400)))
+ | map(select(.)|{key,count:(.value|length)})
+ | map(.value |= to_entries);
+
+def summary2:
+ . as $data
+ | (.[0]|keys)
+ | map(. as $item | {
+ key: $item,
+ value: ($data|map(.[$item])|group_by(.)|map({"\(.[0])": length}))|add
+ })
+ | map(select((.value|to_entries|length)< (.90 * ($data|length))))
+ | from_entries;
+
+##########################################################################################
+# _site/general/codepoints.md
+##########################################################################################
+
+def smart_squotes($s):
+ $s | if (test("[\\s\\n\\t]";"x")) then "\([39]|implode)\($s)\([39]|implode)" else $s end;
+
+def smart_dquotes($s):
+ $s | if (test("[\\s\\n\\t]";"x")) then "\($s|@json)" else $s end;
+
+##########################################################################################
+# _site/general/reduce.md
+##########################################################################################
+
+def tocsv:
+ .[0] as $cols | .[1:]
+ | map(. as $row
+ | $cols
+ | with_entries({ "key": .value,"value": $row[.key]})
+ );
+
+##########################################################################################
+# _site/general/wrangle.md
+##########################################################################################
+
+def s: [splits(" +")];
+
##########################################################################################
# functions/barcharts.md
##########################################################################################
@@ -175,6 +328,91 @@ def summary2:
| map(select((.value|to_entries|length)< (.90 * ($data|length))))
| from_entries;
+##########################################################################################
+# functions/text.md
+##########################################################################################
+
+def split_newlines($s):
+ if ((type == "string") and (($s|tostring|split("\n")|length) > 1)?)
+ then ($s|tostring|split("[\\r\\n]+([\\s]+)?";"x"))
+ elif (type == "object") then to_entries
+ else $s end;
+
+def recuse_split_newlines: walk(split_newlines(.)|from_entries? // .);
+
+def squo: [39]|implode;
+
+def squote($text): [squo,$text,squo]|join("");
+def dquote($text): "\"\($text)\"";
+
+def unsmart($text): $text | gsub("[“”]";"\"") | gsub("[’‘]";"'");
+def unsmart: . | unsmart;
+
+##########################################################################################
+# functions/unroll.md
+##########################################################################################
+
+def categorize:
+ # Returns "object", "array" or "scalar" to indicate the category
+ # of the piped element.
+ if type == "object" then "object"
+ elif type == "array" then "array"
+ else "scalar"
+ end;
+
+def pluck($category):
+ # Plucks the children of a particular category from piped element.
+ if categorize != "object"
+ then empty
+ else to_entries[]
+ | select(.value | categorize == $category)
+ | .value
+ end;
+
+def split:
+ # Splits the piped element's children into arrays, scalars, and objects
+ # and returns a meta object containing the children seperated by these
+ # keys. If the piped element is a scalar or array, this does not look
+ # at the children, but just returns that element in the meta object.
+ if categorize == "scalar" then { objects: [], arrays: [], scalars: [.] }
+ elif categorize == "array" then { objects: [], arrays: [.], scalars: [] }
+ else { objects: [pluck("object")], arrays : [pluck("array")], scalars: [pluck("scalar")] }
+ end;
+
+def unwrap:
+ # Unwraps an array recursively, until the elements of the returned array
+ # are either scalars or objects but not arrays. If piped element is not
+ # an array, returns the element as is.
+ if type != "array" then .
+ elif length == 0 then empty
+ else .[] | unwrap
+ end;
+
+def extract($category):
+ # Extracts the elements of a particular category from the piped in array.
+ # If the piped in element is not an array, this fn acts as filter to
+ # only return the element if it is of the desired category.
+ unwrap | select(.| categorize == $category);
+
+def unroll:
+ # Unrolls the passed in object recursively until only scalars are left.
+ # Returns a row for each leaf node of tree structure of the object and
+ # elements of the row would be all the scalars encountered at all the
+ # ancestor levels of this left node.
+ . | .result += .state.scalars
+ | .state.objects += [.state.arrays | extract("object")]
+ | .state.objects += [.state.arrays | extract("scalar")]
+ | if (.state.objects | length == 0 )
+ then .result
+ else ({ data : .state.objects,
+ state: .state.objects[] | split,
+ result: .result
+ } | unroll)
+ end;
+
+def unrolls($data): { data: $data, state: $data| split, result: [] } | unroll ;
+def unrolls: unrolls(.);
+
##########################################################################################
# general/codepoints.md
##########################################################################################