Skip to content

Commit

Permalink
Add basic examples
Browse files Browse the repository at this point in the history
By Erik-Jan van Kesteren
  • Loading branch information
qubixes committed Nov 21, 2022
1 parent 3b38c98 commit 9fd676f
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 0 deletions.
90 changes: 90 additions & 0 deletions examples/basic_example.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
{
"n_rows": 5,
"n_columns": 5,
"provenance": {
"created by": {
"name": "MetaSynth",
"version": "0.1.2+15.ged3af36",
"privacy": null
},
"creation time": "2022-11-17T13:54:16.686166"
},
"vars": [
{
"name": "ID",
"type": "discrete",
"dtype": "<class 'polars.datatypes.Int64'>",
"prop_missing": 0.0,
"distribution": {
"name": "UniqueKeyDistribution",
"parameters": {
"low": 1,
"consecutive": 1
}
}
},
{
"name": "fruits",
"type": "categorical",
"dtype": "<class 'polars.datatypes.Categorical'>",
"prop_missing": 0.0,
"distribution": {
"name": "MultinoulliDistribution",
"parameters": {
"labels": [
"apple",
"banana"
],
"probs": [
0.4,
0.6
]
}
}
},
{
"name": "B",
"type": "discrete",
"dtype": "<class 'polars.datatypes.Int64'>",
"prop_missing": 0.0,
"distribution": {
"name": "PoissonDistribution",
"parameters": {
"mu": 3.0
}
}
},
{
"name": "cars",
"type": "categorical",
"dtype": "<class 'polars.datatypes.Categorical'>",
"prop_missing": 0.0,
"distribution": {
"name": "MultinoulliDistribution",
"parameters": {
"labels": [
"audi",
"beetle"
],
"probs": [
0.2,
0.8
]
}
}
},
{
"name": "optional",
"type": "discrete",
"dtype": "<class 'polars.datatypes.Int64'>",
"prop_missing": 0.2,
"distribution": {
"name": "DiscreteUniformDistribution",
"parameters": {
"low": -30,
"high": 301
}
}
}
]
}
39 changes: 39 additions & 0 deletions examples/basic_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import polars as pl
from metasynth import MetaDataset

# example dataframe from polars website
df = pl.DataFrame(
{
"ID": [1, 2, 3, 4, 5],
"fruits": ["banana", "banana", "apple", "apple", "banana"],
"B": [5, 4, 3, 2, 1],
"cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
"optional": [28, 300, None, 2, -30],
}
)

# convert appropriate columns to categorical
df = df.with_columns([
pl.col("fruits").cast(pl.Categorical),
pl.col("cars").cast(pl.Categorical),
])

# set A to unique and B to not unique
spec_dict = {
"ID": {"unique": True},
"B": {"unique": False}
}

# create metadataset
mds = MetaDataset.from_dataframe(df, spec=spec_dict)

# write to json
mds.to_json("examples/basic_example.json")

# then, export json from secure environment

# outside secure environment, load json
mds_out = MetaDataset.from_json("examples/basic_example.json")

# create a fake dataset
mds_out.synthesize(10)

0 comments on commit 9fd676f

Please sign in to comment.