Skip to content

Update Alignment Research Dataset embeddings in Pinecone #22

Update Alignment Research Dataset embeddings in Pinecone

Update Alignment Research Dataset embeddings in Pinecone #22

name: Update Alignment Research Dataset embeddings in Pinecone
on:
workflow_call:
inputs:
datasource:
type: string
required: true
db_user:
type: string
required: true
db_password:
type: string
required: true
db_host:
type: string
required: true
pinecone_api_key:
type: string
required: true
pinecone_environment:
type: string
required: true
openai_api_key:
type: string
required: true
workflow_dispatch: # allow manual triggering
inputs:
datasource:
description: 'The datasource to process'
type: choice
options:
- all
- agentmodels
- agisf
- aisafety.info
- alignmentforum
- arbital
- arxiv
- blogs
- distill
- eaforum
- indices
- lesswrong
- special_docs
- youtube
jobs:
build-dataset:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Setup Python environment
uses: actions/setup-python@v2
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r requirements.txt;
python -c 'import nltk; nltk.download("punkt")'
- name: Process dataset
env:
ARD_DB_USER: ${{ secrets.ARD_DB_USER || inputs.db_user }}
ARD_DB_PASSWORD: ${{ secrets.ARD_DB_PASSWORD || inputs.db_password }}
ARD_DB_HOST: ${{ secrets.ARD_DB_HOST || inputs.db_host }}
ARD_DB_NAME: alignment_research_dataset
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || inputs.openai_api_key }}
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY || inputs.pinecone_api_key }}
PINECONE_ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT || inputs.pinecone_environment }}
run: |
if [ "${{ inputs.datasource }}" = "all" ]; then
python main.py pinecone_update_all
else
python main.py pinecone_update ${{ inputs.datasource }}
fi