-
Notifications
You must be signed in to change notification settings - Fork 7
67 lines (59 loc) · 1.67 KB
/
push-dataset.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
name: Synch uploaded jsonl files to HuggingFace
on:
workflow_call:
inputs:
datasource:
type: string
required: true
db_user:
type: string
required: true
db_password:
type: string
required: true
db_host:
type: string
required: true
workflow_dispatch: # allow manual triggering
inputs:
datasource:
description: 'The datasource to process'
type: choice
default: all
options:
- all
- agentmodels
- agisf
- aisafety.info
- alignmentforum
- arbital
- arxiv
- blogs
- distill
- eaforum
- lesswrong
- special_docs
- youtube
jobs:
generate-dataset:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Setup Python environment
uses: actions/setup-python@v2
with:
python-version: '3.11'
- name: Install dependencies
run: pip install -r requirements.txt
- name: Generate dataset file
env:
ARD_DB_USER: ${{ secrets.ARD_DB_USER || inputs.db_user }}
ARD_DB_PASSWORD: ${{ secrets.ARD_DB_PASSWORD || inputs.db_password }}
ARD_DB_HOST: ${{ secrets.ARD_DB_HOST || inputs.db_host }}
ARD_DB_NAME: alignment_research_dataset
run: python main.py generate_jsonl_files ${{ inputs.datasource }}
- name: Setup Huggingface client
run: pip install huggingface_hub gdown jsonlines datasets
- name: Upload files
run: python upload_to_huggingface.py ${{ secrets.HUGGINGFACE_TOKEN }} ${{ inputs.datasource }}