forked from phdata/pipeforge
-
Notifications
You must be signed in to change notification settings - Fork 1
/
environment.yml
26 lines (26 loc) · 1.7 KB
/
environment.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
name: diff # Unique name for data ingestion
group: edh_employee_db # Associated AD group for ingestion
databaseType: mysql # Database type must be mysql, oracle, mssql, hana, or teradata
schema: employees # Database schema to ingest
jdbcUrl: "jdbc:mysql://localhost:3306/employees" # JDBC Url for connecting to database
username: employee # Database user name
objectType: table # Database object type to be ingested must be either table or view
# tables: # Optionally add a whitelisting of tables to ingest
# - employee
#checkColumn: last_updated # Optional configuration for specifying a schema wide check column. `checkColumn` is used for Sqoop incremental ingest
metadata: # Metadata map to be applied to every table's tblproperties. https://www.cloudera.com/documentation/enterprise/latest/topics/impala_create_table.html
SOURCE: employee database # Source database identifier
LOAD_FREQUENCY: Daily # Frequency of data ingestion
test1: Daily # Frequency of data ingestion
CONTACT: [email protected] # Distribution list for data owners
hadoopUser: ps_dev_employee # Hadoop user recommended to use a process account
passwordFile: hdfs:///user/developer/.employee_db_password # Location of sqoop's password file recommended HDFS location
stagingDatabase:
name: edh_employee_staging
path: hdfs:///user/developer/staging/db
rawDatabase:
name: edh_employee_raw
path: hdfs:///user/developer/raw/db
# checkColumn: last_updated # Optional check_column parameter. This value will be added to all pipewrench table configs for incremental ingest
# userDefined: Optional user defined parameters. These key values will be added to the pipewrench environment and configuration files to be used in templates
# table_suffix: _ingest