Skip to content

Commit

Permalink
Draft push action
Browse files Browse the repository at this point in the history
  • Loading branch information
haroldrubio committed Nov 13, 2024
1 parent 0304738 commit 0088d37
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 1 deletion.
42 changes: 42 additions & 0 deletions .github/workflows/push-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# This workflow builds and pushes the expertise image to the Artifact Registry

name: dev-deployment

# Controls when the workflow will run
on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# Triggers the workflow on push events but only for the master branch
push:
branches: [ feature/containerize ]
jobs:
deploy:
# Allow the job to fetch a GitHub ID token
permissions:
id-token: write
contents: read
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Add SSH key
run: |
mkdir -p /home/runner/.ssh
echo "${{ secrets.GCLOUD_SSH_KEY }}" > /home/runner/.ssh/google_compute_engine
echo "${{ secrets.GCLOUD_SSH_KEY_PUB }}" > /home/runner/.ssh/google_compute_engine.pub
chmod 600 /home/runner/.ssh/google_compute_engine
chmod 600 /home/runner/.ssh/google_compute_engine.pub
- name: Authenticate with Google Cloud
id: auth
uses: google-github-actions/auth@v1
with:
workload_identity_provider: ${{ secrets.WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
create_credentials_file: true
cleanup_credentials: true
export_environment_variables: true
- name: Setup gcloud
uses: google-github-actions/setup-gcloud@v1
- name: Run deploy script
run: |
gcloud config set compute/zone us-central1-c
gcloud compute ssh openreview@instance-matching-server --command '/bin/expertise_build_dev.sh -b ${{ github.event.inputs.branch }} -p ${{ github.event.inputs.py_branch }}' --quiet
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ ARG PATH="/app/miniconda/bin:${PATH}"

# Set the environment variable
ENV FLASK_ENV=production
ENV AIP_STORAGE_URI="gs://openreview-expertise/expertise-utils/"
ENV SPECTER_DIR="/app/expertise-utils/specter/"
ENV MFR_VOCAB_DIR="/app/expertise-utils/multifacet_recommender/feature_vocab_file"
ENV MFR_CHECKPOINT_DIR="/app/expertise-utils/multifacet_recommender/mfr_model_checkpoint/"

COPY . /app/openreview-expertise

Expand All @@ -21,6 +25,7 @@ RUN apt update \
&& apt install -y build-essential \
&& apt install -y git \
&& apt install -y sudo \
&& apt install -y vim \
&& DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt install -y tzdata \
&& cd $HOME \
&& wget "https://repo.anaconda.com/miniconda/Miniconda3-py38_22.11.1-1-Linux-x86_64.sh" -O miniconda.sh \
Expand Down
41 changes: 40 additions & 1 deletion expertise/execute_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
baseurl_v1 = raw_request.pop('baseurl_v1')
baseurl_v2 = raw_request.pop('baseurl_v2')
destination_prefix = raw_request.pop('gcs_folder')
dump_embs = False if 'dump_embs' not in raw_request else raw_request.pop('dump_embs')
dump_archives = False if 'dump_archives' not in raw_request else raw_request.pop('dump_archives')
specter_dir = os.getenv('SPECTER_DIR')
mfr_vocab_dir = os.getenv('MFR_VOCAB_DIR')
mfr_checkpoint_dir = os.getenv('MFR_CHECKPOINT_DIR')
Expand Down Expand Up @@ -88,4 +90,41 @@
})
blob = bucket.blob(destination_blob)
contents = '\n'.join([json.dumps(r) for r in result])
blob.upload_from_string(contents)
blob.upload_from_string(contents)

# Dump config
destination_blob = f"{blob_prefix}/job_config.json"
blob = bucket.blob(destination_blob)
blob.upload_from_string(json.dumps(config.to_json()))

# Dump archives
if dump_archives:
for jsonl_file in os.listdir(os.path.join(config.job_dir, 'archives')):
result = []
destination_blob = f"{blob_prefix}/archives/{jsonl_file}"
with open(os.path.join(config.job_dir, 'archives' ,jsonl_file), 'r') as f:
for line in f:
data = json.loads(line)
result.append({
'id': data['id'],
'content': data['content']
})
blob = bucket.blob(destination_blob)
contents = '\n'.join([json.dumps(r) for r in result])
blob.upload_from_string(contents)

# Dump embeddings
if dump_embs:
for emb_file in [d for d in os.listdir(config.job_dir) if '.jsonl' in d]:
result = []
destination_blob = f"{blob_prefix}/{emb_file}"
with open(os.path.join(config.job_dir, emb_file), 'r') as f:
for line in f:
data = json.loads(line)
result.append({
'paper_id': data['paper_id'],
'embedding': data['embedding']
})
blob = bucket.blob(destination_blob)
contents = '\n'.join([json.dumps(r) for r in result])
blob.upload_from_string(contents)

0 comments on commit 0088d37

Please sign in to comment.