Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
107 commits
Select commit Hold shift + click to select a range
0931b07
stub convert45
May 11, 2022
1cb50d5
rename
May 11, 2022
6ca596b
extracted
May 11, 2022
5bb6c7b
path5
May 11, 2022
4fe50d8
convert(root, node):
May 11, 2022
46d55f3
write_yaml
May 11, 2022
60b879d
"0.4.6.dev1_"2022-05-11T22:30:24Z""
May 11, 2022
a9ffaa4
"post-regen-tests/cqml_test_db.py-0.4.6.dev1"
May 11, 2022
4f9d195
remove unused v45 files
May 11, 2022
2b698af
split out yml wrappers
May 11, 2022
97f07e1
run all pkg tests vs tree tests
May 11, 2022
91eca42
fix invalid key in demo.yml
May 11, 2022
e8172e1
cqml tree
May 11, 2022
8b54507
skip pgk tests while dev
May 11, 2022
d9a2c7c
test_yml_keys
May 11, 2022
2a9ad42
extract yml_tree
May 11, 2022
8f06282
extract key from tree
May 11, 2022
3c810c5
spec out new cqml parse
May 12, 2022
754cab8
class CQML_Parser
May 12, 2022
2c9a69e
root
May 14, 2022
cdd40be
Root pipes
May 14, 2022
5666c8d
Root.keys
May 14, 2022
6c6f9ec
self.env
May 14, 2022
263ac54
cqml env
May 14, 2022
f235b0b
folder_key
May 14, 2022
9653e5a
test_keys(root)
May 14, 2022
089a7b2
"0.4.6.dev2_"2022-05-14T17:41:23Z""
May 14, 2022
f440718
"post-regen-tests/cqml_test_db.py-0.4.6.dev2"
May 14, 2022
2f8e94e
test_new(root)
May 14, 2022
5bf3c1b
new CVM
May 14, 2022
40e5f8d
Remove superflous dependencies
May 14, 2022
b467233
self.set_env(pipe, key)
May 14, 2022
117c3b6
bucket config
May 15, 2022
bfcbec7
env.update(source)
May 15, 2022
5f6389c
project/package
May 15, 2022
b0ed477
meta -> env
May 15, 2022
9532f2e
from .root import Root
May 15, 2022
a0f49ce
remove un-rooted yml
May 15, 2022
5d73e56
TEST_KEY
May 15, 2022
a6c5e15
"0.4.6.dev3_"2022-05-15T03:57:16Z""
May 15, 2022
b86734f
"post-regen-tests/cqml_test_db.py-0.4.6.dev3"
May 15, 2022
f7a82f7
create package correctly
May 15, 2022
e3a9bbe
"0.4.6.dev4_"2022-05-15T04:21:12Z""
May 15, 2022
f43f9b7
"post-regen-tests/cqml_test_db.py-0.4.6.dev4"
May 15, 2022
26dea05
"0.4.6.dev5_"2022-05-15T04:23:05Z""
May 15, 2022
201d2f1
"post-regen-tests/cqml_test_db.py-0.4.6.dev5"
May 15, 2022
a564a6c
run cvm directly
May 15, 2022
a04e8b6
"0.4.6.dev6_"2022-05-15T23:37:32Z""
May 15, 2022
985caf4
"post-regen-tests/cqml_test_db.py-0.4.6.dev6"
May 15, 2022
132933e
v0.5
May 15, 2022
b665922
"0.5.0.dev2_"2022-05-15T23:40:31Z""
May 15, 2022
2cc6d99
"post-regen-tests/cqml_test_db.py-0.5.0.dev2"
May 15, 2022
587de7e
run directly
May 15, 2022
420540e
"0.5.0.dev3_"2022-05-15T23:49:36Z""
May 15, 2022
72704fd
"post-regen-tests/cqml_test_db.py-0.5.0.dev3"
May 15, 2022
fe55a83
print(cvm.yaml)
May 16, 2022
80522bc
"0.5.0.dev4_"2022-05-16T03:48:05Z""
May 16, 2022
237bc8c
"post-regen-tests/cqml_test_db.py-0.5.0.dev4"
May 16, 2022
cf0e0e1
assert 'org' in yml['env']
May 16, 2022
fa1f9ad
"0.5.0.dev5_"2022-05-16T03:54:53Z""
May 16, 2022
dd93e0b
"post-regen-tests/cqml_test_db.py-0.5.0.dev5"
May 16, 2022
d0f2817
set_env debug
May 16, 2022
2d09c9f
"0.5.0.dev6_"2022-05-16T04:02:17Z""
May 16, 2022
5fc503a
"post-regen-tests/cqml_test_db.py-0.5.0.dev6"
May 16, 2022
e51e7f3
root_key
May 16, 2022
a141a46
"0.5.0.dev7_"2022-05-16T04:08:02Z""
May 16, 2022
3ffa1d9
"post-regen-tests/cqml_test_db.py-0.5.0.dev7"
May 16, 2022
a2b2c19
pipes/all
May 16, 2022
68a81f7
"0.5.0.dev8_"2022-05-16T04:25:54Z""
May 16, 2022
8932951
"post-regen-tests/cqml_test_db.py-0.5.0.dev8"
May 16, 2022
694a0dc
box better
May 16, 2022
de7b4ae
"0.5.0.dev9_"2022-05-16T04:28:57Z""
May 16, 2022
5db561b
"post-regen-tests/cqml_test_db.py-0.5.0.dev9"
May 16, 2022
e45bdf0
"0.5.0.dev10_"2022-05-16T04:39:02Z""
May 16, 2022
9d1fd82
"post-regen-tests/cqml_test_db.py-0.5.0.dev10"
May 16, 2022
5053b23
"0.5.0.dev11_"2022-05-16T04:44:35Z""
May 16, 2022
031b99d
"post-regen-tests/cqml_test_db.py-0.5.0.dev11"
May 16, 2022
4fb7263
skip empty box table
May 16, 2022
88ccab2
"0.5.0.dev12_"2022-05-16T17:43:52Z""
May 16, 2022
029e65f
"post-regen-tests/cqml_test_db.py-0.5.0.dev12"
May 16, 2022
8c480a4
"0.5.0.dev13_"2022-05-16T18:56:14Z""
May 16, 2022
31dd2c1
"post-regen-tests/cqml_test_db.py-0.5.0.dev13"
May 16, 2022
a5238c0
"0.5.0.dev14_"2022-05-16T19:02:52Z""
May 16, 2022
b56fdaa
"post-regen-tests/cqml_test_db.py-0.5.0.dev14"
May 16, 2022
f6a315d
"0.5.0.dev15_"2022-05-16T19:13:57Z""
May 16, 2022
064a231
"post-regen-tests/cqml_test_db.py-0.5.0.dev15"
May 16, 2022
55ece6d
calc_quarters
May 16, 2022
623b7b0
"0.5.0.dev16_"2022-05-16T19:18:10Z""
May 16, 2022
4ba01bf
"post-regen-tests/cqml_test_db.py-0.5.0.dev16"
May 16, 2022
3c426a4
drop aggregates
May 16, 2022
d260100
"0.5.0.dev17_"2022-05-16T19:20:08Z""
May 16, 2022
9628828
"post-regen-tests/cqml_test_db.py-0.5.0.dev17"
May 16, 2022
bc54d1e
cvm.do_save
May 16, 2022
820bb8c
use reports s3 bucket
May 17, 2022
6b4441c
"0.5.0.dev18_"2022-05-17T03:32:39Z""
May 17, 2022
a3c3fc7
"post-regen-tests/cqml_test_db.py-0.5.0.dev18"
May 17, 2022
4d3de63
"0.5.0.dev19_"2022-05-20T22:49:36Z""
May 20, 2022
6d6a0cc
"post-regen-tests/cqml_test_db.py-0.5.0.dev19"
May 20, 2022
f24522f
"0.5.0.dev20_"2022-05-20T23:55:35Z""
May 20, 2022
1894a53
"post-regen-tests/cqml_test_db.py-0.5.0.dev20"
May 20, 2022
3556d96
"0.5.0.dev21_"2022-05-21T04:38:33Z""
May 21, 2022
bad9958
"post-regen-tests/cqml_test_db.py-0.5.0.dev21"
May 21, 2022
a4afd85
"0.5.0.dev22_"2022-05-25T00:07:47Z""
May 25, 2022
2fc52ef
"post-regen-tests/cqml_test_db.py-0.5.0.dev22"
May 25, 2022
db29ec3
"0.5.0.dev23_"2022-05-25T00:09:29Z""
May 25, 2022
36ba831
"0.5.0.dev24_"2022-05-25T00:12:49Z""
May 25, 2022
5698bd9
"post-regen-tests/cqml_test_db.py-0.5.0.dev24"
May 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
0.4.6 (unreleased)
0.5.0 (unreleased)
------------------

- Nothing changed yet.
- Use extrinsic folder names for project/package
- do: schedule for top-level ordering and scheduling


0.4.5 (2022-05-10)
------------------

- Fix Manifest


0.4.4 (unreleased)
------------------

- YMM 0.6.1 support: open direct URL for test runs


0.4.3 (2022-04-18)
------------------

Expand Down
58 changes: 58 additions & 0 deletions archive-v45/cqml45.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env python3
import os, yaml

#
# Take two paths: root4 root5
# Copy files in each subfolder
# Strip out meta
# Strip leading numbers
#

ROOT="/Users/nauto/Developer"
R4=f"{ROOT}/it/databricks"
R5=f"{ROOT}/dbt/cqml"

def read_yaml(yaml_file):
with open(yaml_file) as data:
raw_yaml = yaml.full_load(data)
return raw_yaml

def write_yaml(yaml_file, raw_yaml):
with open(yaml_file, 'w') as file:
yaml.dump(raw_yaml, file, sort_keys=False)

def extract(root):
tree = []
for folder in os.scandir(root):
if folder.is_dir():
print(folder.name)
for file in os.scandir(folder.path):
if file.name.endswith(".yml"):
yml = read_yaml(file.path)
node = {"file":file.name,"project":folder.name, "yml": yml}
tree.append(node)
return tree

def convert(root, node):
dir = node["project"]
file = ''.join([c for c in node["file"] if not c.isdigit()])
prefix = file.split("_")[0]
name = file.split("_")[1]
if prefix == "rnr": dir = prefix
if prefix == "sierra": name = f"{prefix}.yml"
path = os.path.join(root, dir, name)
yml = node["yml"]
del yml["meta"]
yml["cqml"] = 0.5
yml["project"] = dir
yml["package"] = name
write_yaml(path, yml)
return path

t = extract(R4)
print(f"\nExtracted: {len(t)} files\n")
#print(t[0]["yml"])
for n in t:
print(n["file"])
p = convert(R5, n)
print("\t",p)
16 changes: 16 additions & 0 deletions pipes/all.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
cqml: 0.6
order: 0.0
id: main
env:
org: nauto
bucket: biz-databricks-prod-reports
catalog: quilt
root: /dbfs/tmp
path: .
actions:
all:
do: run
start: 0500
pipes:
- demo/demo
- test/cqml
30 changes: 7 additions & 23 deletions tests/demo.yml → pipes/demo/demo.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
cqml: 0.4.4
cqml: 0.5.0
id: cqml_demo
meta:
org: nauto
project: sangam
s3.bucket: biz-databricks-root-prod-us
catalog: quilt
root: /dbfs/tmp
actions:
/Workspace/Repos/[email protected]/cqml/data:
do: loadfiles
Expand Down Expand Up @@ -43,7 +37,7 @@ actions:
file_ext: csv
expiration_date: '2200-02-02'
cols:
letter: sort
better: sort
dat: sort
widget-report:
do: report
Expand All @@ -62,11 +56,11 @@ actions:
letter: tbd
plus_operator:
do: call
from: rename_merged
from: grouped
operator: +
args:
- next
- num
- text
- sum_num
calc_quarters:
do: calc
from: plus_operator
Expand All @@ -77,8 +71,8 @@ actions:
- $col
- 4
cols:
num: qnum
next: qnumb
sum_num: qnum
text: qnumb
plus_operator: qdumb
flagged:
do: flag
Expand Down Expand Up @@ -130,13 +124,3 @@ actions:
cols:
letter: tbd
text: tbd
aggregates:
do: group
from: $id
agg:
concat_space: count
call_coalesce: sum
sort: n_concat_space
cols:
num: tbd
letter: tbd
6 changes: 0 additions & 6 deletions tests/cqml_test.yml → pipes/test/cqml.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
cqml: 0.2
id: cqml_test
meta:
org: nauto
project: sangam
s3.bucket: biz-databricks-root-prod-us
catalog: quilt
root: /dbfs/tmp
actions:
/Workspace/Repos/[email protected]/cqml/data:
do: loadfiles
Expand Down
2 changes: 2 additions & 0 deletions src/cqml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@

# TODO: https://github.com/LucaCanali/sparkMeasure

from .yml import *
from .wrappers import *
from .root import Root
10 changes: 6 additions & 4 deletions src/cqml/boxquilt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from boxsdk import Client, OAuth2, JWTAuth
from pyspark.sql import Row
from pyspark.sql.functions import udf,lit
from pyspark.sql.types import StringType
from pyspark.sql.types import StringType, StructType
import os

def dir_row(folder):
Expand Down Expand Up @@ -119,7 +119,7 @@ def create_or_update_box(self, skipUpdate=False):
dbfs = list(self.rows.keys())
to_create = list(set(dbfs) - set(box))
to_update = list(set(dbfs).intersection(box))
print(f"create:{len(to_create)} update:{len(to_update)}")
print(f"box_create:{len(to_create)} update:{len(to_update)}")

n = 0
for name in to_create:
Expand Down Expand Up @@ -147,5 +147,7 @@ def create_or_update_box(self, skipUpdate=False):

def box_table(self):
array = list(self.rows.values())
print(f'box_table: {len(array)}')
return self.spark.createDataFrame([Row(**i) for i in array])
if len(array) > 0:
print(f'box_table: {len(array)}')
return self.spark.createDataFrame([Row(**i) for i in array])
return self.spark.createDataFrame([], StructType([]))
33 changes: 23 additions & 10 deletions src/cqml/db2quilt.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from pathlib import Path
import pprint
pp = pprint.PrettyPrinter(indent=4)
QPKG = q3.Package()

def cleanup_names(df):
for c in df.columns:
Expand Down Expand Up @@ -112,8 +111,8 @@ def callback(x): grid.restore(filter=[['{KEY}', '==', x]])
grid
"""
def make_widget(opts):
print('make_widget')
print(opts)
#print('make_widget')
#print(opts)
code = [NB_WIDGET.format(KEY=col,WIDGET=w) for col, w in opts.items()]
cells = [[True, c] for c in code]
return cells
Expand All @@ -133,9 +132,17 @@ def make_slug(name): return re.sub(r'[^\w-]', '_', name.lower())
Quilt Wrappers
"""

DEFAULT_ENV={
'catalog': PKG_DIR,
'root': PYROOT,
}

def get_env(env, key, default):
return env[key] if key in env else default

class Project:
def __init__(self, config):
org, bucket, project = itemgetter('org','s3.bucket','project')(config)
org, bucket, project = itemgetter('org','bucket','project')(config)
pkg_dir = config['catalog'] if 'catalog' in config else PKG_DIR
root = config['root'] if 'root' in config else PYROOT
self.repo = "s3://"+bucket
Expand All @@ -147,20 +154,21 @@ def package(self, id):
return Package(id, self)

class Package:
def __init__(self, id, proj, reset=False):
def __init__(self, id, proj, reset=True):
self.id = id
self.name = f"{proj.name}/{id}"
self.proj = proj
self.url = f"{proj.url}/{self.name}/"
self.path = f"{proj.path}/{self.name}/"
self.dir = to_dir(self.path)
self.pkg = q3.Package.browse(self.name, registry=self.proj.repo)
if reset:
shutil.rmtree(self.path,ignore_errors=True)
make_dir(self.path)
self.summaries={}

def setup(self):
QPKG.install(self.name, registry=self.proj.repo, dest=self.path)
self.pkg.install(self.name, registry=self.proj.repo, dest=self.path)

def read_csv(self, filename):
path = self.path+filename
Expand All @@ -170,8 +178,10 @@ def read_csv(self, filename):

def cleanup(self, msg, meta = {"db2quilt":"v0.1"}):
self.write_summary()
QPKG.set_dir('/',path=self.path, meta=meta)
QPKG.push(self.name, self.proj.repo, message=msg,force=True) #,
self.pkg.set_dir('/',path=self.path, meta=meta)
print(self.pkg)

self.pkg.push(self.name, self.proj.repo, message=msg,force=True) #,
#shutil.rmtree(self.path)
self.html = f'Published <a href="{self.url}">{self.name}</a> for <b>{msg}</b>'
return self
Expand Down Expand Up @@ -293,10 +303,13 @@ def write_summary(self):
#

def extract_pkg(cvm):
id, config = itemgetter('id','meta')(cvm.yaml)
# print(cvm.yaml)
config = itemgetter(kEnv)(cvm.yaml)
#print(f"extract_pkg.config: {config}")
proj = Project(config)
id = config["package"]
pkg_id = id + DEBUG_SUFFIX if cvm.debug == True else id
print("extract_pkg: "+pkg_id)
#print("extract_pkg: "+pkg_id)
pkg = proj.package(pkg_id)
#pkg.setup()
return pkg
Expand Down
2 changes: 2 additions & 0 deletions src/cqml/keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
TRACE=True

cAlias='|'
kALL='_all_'
kAbove='above'
kAny='any'
kArgs='args'
Expand All @@ -17,6 +18,7 @@
kCount='count'
kDoc='+doc'
kDrop='drop'
kEnv='env'
kExt='file_ext'
kFunc='function'
kGroup='group'
Expand Down
72 changes: 72 additions & 0 deletions src/cqml/root.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import os, yaml
from .keys import *
from .wrappers import CQML, pkg_cvm

def read_yaml(yaml_file):
with open(yaml_file) as data:
raw_yaml = yaml.full_load(data)
return raw_yaml

class Root:
def __init__(self, root):
self.root = root.split("/")[-1]
self.pipes = {}
self.env = {}
self.scan(root)

def keys(self): return list(self.pipes.keys())

def add_env(self, yml, key):
if not kEnv in yml: return {}
self.env[key] = yml[kEnv]
return yml[kEnv]

def set_env(self, yml, key):
print(f'set_env:{key}')
folder = yml[kEnv]["project"]
env = {}
if self.root in self.env: env.update(self.env[self.root])
if folder in self.env: env.update(self.env[folder])
if kEnv in yml: env.update(yml[kEnv])
yml[kEnv] = env
return env

def new(self, spark, key, debug=False):
pipe = self.pipes[key]
self.set_env(pipe, key)
cvm = CQML(pipe, spark)
if debug: cvm.debug = True
return cvm

def pkg(self, spark, key, debug=False):
cvm = self.new(spark, key, debug)
cvm.run();
return pkg_cvm(cvm)

#def pkg_all(self, spark, debug=False): return {key:self.pkg(spark, key, debug) for key in self.keys()}

def scan(self, root):
for entry in os.scandir(root):
self.parse(entry, root)

def parse(self, entry, folder):
name = entry.name
if name.endswith(".yml"):
yml = read_yaml(entry.path)
file_key = os.path.splitext(name)[0]
folder_key = folder.split("/")[-1]
env = self.add_env(yml, folder_key)
key = f"{folder_key}/{file_key}"
source = {
"file": name,
"package": file_key,
"project": folder_key,
"key": key,
"path": entry.path,
}
env.update(source)
yml[kEnv] = env
self.pipes[key] = yml
elif entry.is_dir():
print(entry.name)
self.scan(entry.path)
1 change: 1 addition & 0 deletions src/cqml/vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def log(self, str, name=False):
if self.debug:
if name: print(name)
print(str)
return str

def macro(self, todo, action):
mdef = todo.split("|")
Expand Down
Loading