Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Jstein77 authored May 25, 2023
0 parents commit 30589fe
Show file tree
Hide file tree
Showing 54 changed files with 164,303 additions and 0 deletions.
32 changes: 32 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"name": "Python 3",
"image": "mcr.microsoft.com/devcontainers/python:0-3.10-bullseye",
"features": {
"ghcr.io/devcontainers-contrib/features/black:1": {},
"ghcr.io/devcontainers-contrib/features/meltano": {},
"ghcr.io/devcontainers/features/node:1": {},
"ghcr.io/eitsupi/devcontainer-features/duckdb-cli:1": {},
"ghcr.io/eitsupi/devcontainer-features/go-task:1": {}
},
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "task deps",
"remoteEnv": {
"DATABASE": "duckdb",
"FILENAME": "jaffle_shop.duckdb"
},
// Configure tool-specific properties
"customizations": {
"vscode": {
"settings": {
"terminal.integrated.defaultProfile.linux": "zsh"
},
"extensions": [
"dorzey.vscode-sqlfluff",
"esbenp.prettier-vscode",
"GitHub.codespaces"
]
}
}
}
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
Binary file added .github/static/codespaces-setup-screen.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added .github/static/open-codespace.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added .github/static/use-template.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: CI

on: pull_request

jobs:
dbt-build:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install -r requirements.txt
pipx install meltano && meltano install
- name: Run Extract and Load
run: meltano run tap-jaffle-shop target-duckdb
- name: Install dbt Dependencies
run: dbt deps
- name: dbt Build
run: dbt build
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.venv
target/
dbt_packages/
logs/
*.duckdb
*.duckdb.wal
reports/sources/*.csv
.meltano
.DS_Store
34 changes: 34 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- id: requirements-txt-fixer
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.245
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/pre-commit/mirrors-eslint
rev: v8.34.0
hooks:
- id: eslint
- repo: https://github.com/sqlfluff/sqlfluff
rev: "2.0.0a4"
hooks:
- id: sqlfluff-lint
additional_dependencies:
["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"]
- id: sqlfluff-fix
additional_dependencies:
["dbt-duckdb==1.4.0", "sqlfluff-templater-dbt==2.0.0a4"]
- repo: https://github.com/psf/black
rev: "23.1.0"
hooks:
- id: black
# - repo: https://github.com/pre-commit/mirrors-prettier
# rev: "" # Use the sha or tag you want to point at
# hooks:
# - id: prettier
37 changes: 37 additions & 0 deletions .sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[sqlfluff]
dialect = duckdb
templater = dbt
runaway_limit = 10
max_line_length = 80
indent_unit = space

[sqlfluff:indentation]
tab_space_size = 4

[sqlfluff:layout:type:comma]
spacing_before = touch
line_position = trailing

[sqlfluff:rules:capitalisation.keywords]
capitalisation_policy = lower

[sqlfluff:rules:aliasing.table]
aliasing = explicit

[sqlfluff:rules:aliasing.column]
aliasing = explicit

[sqlfluff:rules:aliasing.expression]
allow_scalar = False

[sqlfluff:rules:capitalisation.identifiers]
extended_capitalisation_policy = lower

[sqlfluff:rules:capitalisation.functions]
capitalisation_policy = lower

[sqlfluff:rules:capitalisation.literals]
capitalisation_policy = lower

[sqlfluff:rules:ambiguous.column_references] # Number in group by
group_by_and_order_by_style = implicit
4 changes: 4 additions & 0 deletions .sqlfluffignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
reports
target
dbt_packages
macros
1 change: 1 addition & 0 deletions .user.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
id: bdc35ce7-c9bf-45be-b2f9-897ab7918ef8
19 changes: 19 additions & 0 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"recommendations": [
"GitHub.vscode-pull-request-github",
"cschleiden.vscode-github-actions",
"ms-python.python",
"ms-python.vscode-pylance",
"ms-python.black-formatter",
"ms-python.isort",
"charliermarsh.ruff",
"redhat.vscode-yaml",
"samuelcolvin.jinjahtml",
"bungcip.better-toml",
"tamasfe.even-better-toml",
"mechatroner.rainbow-csv",
"evidence.evidence-vscode",
"svelte.svelte-vscode",
"bastienboutonnet.vscode-dbt"
]
}
9 changes: 9 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"python.analysis.typeCheckingMode": "basic",
"sqlfluff.dialect": "duckdb",
"sqlfluff.experimental.format.executeInTerminal": true,
"[jinja-sql]": {
"editor.defaultFormatter": "dorzey.vscode-sqlfluff",
"editor.formatOnSave": false
}
}
128 changes: 128 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# 🥪 The Jaffle Shop 🦘
[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/dbt-labs/jaffle-shop-template?quickstart=1)
[![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/dbt-labs/jaffle-shop-template)

This is a template for creating a fully functional dbt project for teaching, learning, writing, demoing, or any other scenarios where you need a basic project with a synthesized jaffle shop business. We recommend beginners use the following steps to open this project right here on GitHub in a Codespace. If you're a little more experienced with devcontainers and want to go faster 🏎️, you can use the Gitpod link above for a quicker startup and deeper feature set.

## How to use

### 1. Click the big green 'Use this template' button and 'Create a new repository'.

![Click use template](.github/static/use-template.gif)

This will create a new repository exactly like this one, and navigate you there. Make sure to execute the next instructions in that repo.

### 2. Click 'Code', then 'Codespaces, then 'Create codespace on main'.

![Create codespace on main](.github/static/open-codespace.gif)

This will create a new `codespace`, a sandboxed devcontainer with everything you need for a dbt project. Once the codespace is finished setting up, you'll be ready to run a `dbt build`.

### 3. Make sure to wait til the codespace is finished setting up.

![Codespaces setup screen at postCreateCommand](.github/static/codespaces-setup-screen.png)

After the container is built and connected to, VSCode will run a few clean up commands and then a `postCreateCommand`, a set of commands run after the container is set up. This is where we install our dependencies, such as dbt, the duckdb adapter, and other necessities, as well as run `dbt deps` to install the dbt packages we want to use. That screen will look something like the above, when its completed it will close and leave you in a fresh terminal prompt. From there you're ready to do some analytics engineering!

## Additional included tools

This template includes two additional tools for the other parts of the stack to create a more realistic experience:

- BI reporting built with [Evidence](https://evidence.dev) - an open source, code-based BI tool to write reports with markdown and SQL.
- EL with [Meltano](https://meltano.com/) - an open source tool that provides a CLI & version control for ELT pipelines.

### Evidence

With Evidence you can:

- Version control your BI layer
- Build reports in the same repo as your dbt project
- Deploy your reports to a static site

#### Running Evidence

To run Evidence, use:

```shell
cd reports
npm run dev
```

See the [Evidence CLI docs](https://docs.evidence.dev/cli) for more details.

You can make changes to the markdown pages in the `reports/pages` folder and see the reports update in the browser preview.

#### Learning More about Evidence

- [Getting Started Walkthrough](https://docs.evidence.dev/getting-started/install-evidence)
- [Project Home Page](https://www.evidence.dev)
- [Github](https://github.com/evidence-dev/evidence)
- [Evidence.dev Releases](https://github.com/evidence-dev/evidence/releases)

### Meltano

This project is preconfigured with Meltano, which can be used to extract and load raw data into DuckDB.

#### Run EL (Extract and Load) using Meltano

```console
meltano run tap-jaffle-shop target-duckdb
```

Optionally, you can modify extract parameters using environment variables. For instance, this modified version will extract five years of data instead of the default 1 year.

```console
TAP_JAFFLE_SHOP_YEARS=5
meltano run tap-jaffle-shop target-duckdb
```

You can also modify any tap or target config with the interactive `config` command:

```console
meltano config tap-jaffle-shop set --interactive
meltano config target-duckdb set --interactive
```

## Local development

This project is optimized for running in a container. If you'd like to use it locally outside of container you'll need to follow the instructions below.

1. Create a python virtual environment and install the dependencies.

```console
python3 -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
```

2. Install meltano with [pipx](https://pypa.github.io/pipx/installation/). And install meltano's dependencies.

```console
pipx install meltano
meltano install
```

3. Run the EL pipeline.

```console
meltano run el
```

4. Install dbt dependencies and build the dbt project.

```console
dbt deps
dbt build
```

5. Install Evidence dependencies and run the Evidence server.

```console
cd reports
npm install
npm run dev
```

## Contributing

We welcome issues and PRs requesting or adding new features. The package that generates the synthetic data, [`jafgen`](https://pypi.org/project/jafgen/), is also under active development, and will add more types of source data to model as we go along. If you have tests, descriptions, new models, metrics, materializations types, or techniques you use this repo to demonstrate, which you feel would make for a more expansive baseline experience, we encourage you to consider contributing them back in so that this project becomes an even better collective tool for exploring and learning dbt over time.
7 changes: 7 additions & 0 deletions Taskfile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: "3"

tasks:
deps:
cmds:
- python -m pip install --progress-bar off -r requirements.txt
- dbt deps
Empty file added analyses/.gitkeep
Empty file.
41 changes: 41 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: "jaffle_shop"
version: "1.0.0"
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: "duckdb"

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"

vars:
truncate_timespan_to: "{{ current_timestamp() }}"

# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.

models:
jaffle_shop:
staging:
+materialized: view
marts:
+materialized: table
Loading

0 comments on commit 30589fe

Please sign in to comment.