forked from itversity/databricks-sql
-
Notifications
You must be signed in to change notification settings - Fork 0
/
02 Getting Started with Databricks SQL.sql
60 lines (41 loc) · 1.48 KB
/
02 Getting Started with Databricks SQL.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
-- Setup Retail Database
CREATE DATABASE IF NOT EXISTS retail_db;
CREATE TABLE IF NOT EXISTS retail_db.orders (
order_id INT,
order_date STRING,
order_customer_id INT,
order_status STRING
);
CREATE TABLE IF NOT EXISTS retail_db.order_items (
order_item_id INT,
order_item_order_id INT,
order_item_product_id INT,
order_item_quantity INT,
order_item_subtotal FLOAT,
order_item_product_price FLOAT
);
-- Review Retail Database
DESCRIBE FORMATTED retail_db.orders;
DESCRIBE FORMATTED retail_db.order_items;
-- Step 1: Configure Databricks CLI
-- Step 2: Use Databricks CLI to push data to Databricks Platform
-- Step 3: Use Databricks Platform to load data into retail_db tables
-- Step 4: Review the data using SQL Editor of Databricks SQL Platform
-- Validate Retail Database Tables.
SELECT count(*) FROM retail_db.orders;
SELECT count(*) FROM retail_db.order_items;
SELECT * FROM retail_db.orders LIMIT 10;
SELECT * FROM retail_db.order_items LIMIT 10;
SELECT count(distinct(order_id)) FROM retail_db.orders;
SELECT count(distinct(order_item_id)) FROM retail_db.order_items;
-- Overview of External Tables
CREATE EXTERNAL TABLE IF NOT EXISTS retail_db.orders_external (
order_id INT,
order_date STRING,
order_customer_id INT,
order_status STRING
) USING CSV
LOCATION 'dbfs:/public/retail_db/orders';
DESCRIBE FORMATTED retail_db.orders_external;
SELECT * FROM retail_db.orders_external LIMIT 10;
SELECT count(*) FROM retail_db.orders_external;