ci: add consolidate gguf model pipeline

janhq · Feb 21, 2025 · b140b6a · b140b6a
1 parent aa570e3
commit b140b6a
Showing 1 changed file with 111 additions and 0 deletions.
diff --git a/.github/workflows/consolidate-gguf-model-to-main.yml b/.github/workflows/consolidate-gguf-model-to-main.yml
@@ -0,0 +1,111 @@
+name: Consolidate GGUF Models to Main Branch
+
+on:
+  workflow_dispatch:
+    inputs:
+      model_name:
+        description: "Base model name (e.g., internlm3-8b-instruct)"
+        required: true
+      repo_id:
+        description: "HuggingFace repository ID (e.g., cortexso/internlm3)"
+        required: true
+
+env:
+  MODEL_NAME: ${{ inputs.model_name }}
+  REPO_ID: ${{ inputs.repo_id }}
+
+jobs:
+  consolidate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: |
+          pip install huggingface_hub
+          pip install requests
+
+      - name: Create temporary directory
+        run: mkdir -p temp_models
+
+      - name: Download and rename models
+        env:
+          HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_READ }}
+        run: |
+          python3 - << EOF
+          from huggingface_hub import HfApi, hf_hub_download
+          import os
+
+          # Initialize HF API
+          api = HfApi(token=os.environ['HF_TOKEN'])
+          repo_id = os.environ['REPO_ID']
+          model_name = os.environ['MODEL_NAME']
+
+          # Define quantization levels and their mappings
+          quant_levels = {
+              'q2-k': 'q2_k',
+              'q3-ks': 'q3_k_s',
+              'q3-km': 'q3_k_m',
+              'q3-kl': 'q3_k_l',
+              'q4-ks': 'q4_k_s',
+              'q4-km': 'q4_k_m',
+              'q5-ks': 'q5_k_s',
+              'q5-km': 'q5_k_m',
+              'q6-k': 'q6_k',
+              'q8-0': 'q8_0'
+          }
+
+          # Get all files in the repository
+          repo_files = api.list_repo_files(repo_id)
+
+          for quant, formatted_quant in quant_levels.items():
+              try:
+                  # Look for model file in each quantization branch
+                  branch_name = quant
+                  model_path = f"{quant}/model.gguf"
+                  
+                  if model_path in repo_files:
+                      # Download the model
+                      output_path = f"temp_models/{model_name}-{formatted_quant}.gguf"
+                      hf_hub_download(
+                          repo_id=repo_id,
+                          filename=model_path,
+                          local_dir="temp_models",
+                          local_dir_use_symlinks=False,
+                          token=os.environ['HF_TOKEN']
+                      )
+                      # Rename the downloaded file
+                      os.rename(f"temp_models/{model_path}", output_path)
+                      print(f"Successfully processed {quant} model")
+              except Exception as e:
+                  print(f"Error processing {quant} model: {str(e)}")
+          EOF
+
+      - name: Upload consolidated models
+        env:
+          HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_WRITE }}
+        run: |
+          # Login to Hugging Face
+          huggingface-cli login --token $HF_TOKEN
+
+          # Upload all models to main branch
+          for file in temp_models/*.gguf; do
+            if [ -f "$file" ]; then
+              echo "Uploading $file..."
+              huggingface-cli upload "${{ env.REPO_ID }}" "$file" .
+            fi
+          done
+
+          # Cleanup
+          rm -rf temp_models
+
+      - name: Logout from Hugging Face
+        run: huggingface-cli logout