argonne-lcf · keceli · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/.github/workflows/proofread.yml b/.github/workflows/proofread.yml
@@ -0,0 +1,47 @@
+name: Proofread Markdown Files
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  proofread:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout PR code
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 0  # Fetch all history for accurate diff
+        ref: ${{ github.event.pull_request.head.sha }}  # Checkout the PR's HEAD commit
+
+    - name: Fetch base branch
+      run: git fetch origin ${{ github.base_ref }} --depth=1
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        pip install openai PyGithub
+
+    - name: Get list of changed Markdown files
+      id: changed_files
+      run: |
+        FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD -- '*.md')
+        echo "files<<EOF" >> $GITHUB_OUTPUT
+        echo "${FILES}" >> $GITHUB_OUTPUT
+        echo "EOF" >> $GITHUB_OUTPUT
+
+    - name: Proofread changed Markdown files
+      if: steps.changed_files.outputs.files != ''
+      run: |
+        echo "${{ steps.changed_files.outputs.files }}" > changed_files.txt
+        python scripts/proofread.py changed_files.txt
+      env:
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        GITHUB_REPOSITORY: ${{ github.repository }}
+        PR_NUMBER: ${{ github.event.number }}
diff --git a/docs/sophia/not_in_nav/queueing-and-running-jobs/job-and-queue-scheduling.md b/docs/sophia/not_in_nav/queueing-and-running-jobs/job-and-queue-scheduling.md
@@ -1,4 +1,4 @@
-# Job and Queue Scheduling on ThetaGPU
+# Job and Queue Sheduling on ThetaGPU
 ## Queues and Job Scheduling
 
 ### Nodes vs Queue vs MIG mode
@@ -12,10 +12,10 @@ There are three primary queues:
   - bigmem -  2 of the nodes have 640 GB of memory compared to the other 22 nodes with 320 GB. Use this queue to access these 2 nodes by specifying ```-q bigmem``` in your script. A max of 2 nodes (-n 2) can be requested in this queue.
   - single-gpu: This is the general production queue for jobs that operate best on a single GPUs. The -n parameter in your qsub should always be 1 as you can only submit to a single gpu. If you need more than 1 gpu, use the full-node queue.
 
-Here are the initial queue limits. You may not violate either of these policies.
+Here are the initdial queue limits. You may not violate either of these policies.
 
 #### full-node queue:
-- MinTime is 5 minutes
+- MinTime   is 5 minutes
 - MaxTime is 12 hours
 - MaxQueued will be 20 jobs
 - MaxRunning will be 10 jobs
@@ -27,7 +27,7 @@ Here are the initial queue limits. You may not violate either of these policies.
 - MaxRunning is 1 job
 
 #### single-gpu queue:
-- MinTime is 5 minutes
+- MinTime is 5 minsutes
 - MaxTime is 1 hour
 - MaxQueued is 1 job
 - MaxRunning is 1 job

diff --git a/scripts/proofread.py b/scripts/proofread.py
@@ -0,0 +1,60 @@
+import os
+import sys
+import openai
+from github import Github
+import difflib
+
+def main():
+    files_list_file = sys.argv[1]
+    with open(files_list_file, 'r') as f:
+        files = f.read().splitlines()
+
+    openai.api_key = os.environ['OPENAI_API_KEY']
+    github_token = os.environ['GITHUB_TOKEN']
+    repository = os.environ['GITHUB_REPOSITORY']
+    pr_number = int(os.environ['PR_NUMBER'])
+
+    g = Github(github_token)
+    repo = g.get_repo(repository)
+    pr = repo.get_pull(pr_number)
+
+    for file_path in files:
+        if not os.path.exists(file_path):
+            print(f"File {file_path} does not exist.")
+            continue
+
+        with open(file_path, 'r', encoding='utf-8') as f:
+            original_content = f.read()
+
+        # Use OpenAI API to proofread the content
+        response = openai.chat_completions.create(
+            model="gpt-4",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that proofreads and corrects markdown text."},
+                {"role": "user", "content": f"Proofread and correct the following markdown content:\n\n{original_content}"},
+            ],
+            temperature=0.0,
+        )
+
+        proofread_content = response['choices'][0]['message']['content']
+
+        # Compute the diff
+        diff = difflib.unified_diff(
+            original_content.splitlines(),
+            proofread_content.splitlines(),
+            fromfile=f'a/{file_path}',
+            tofile=f'b/{file_path}',
+            lineterm=''
+        )
+
+        diff_text = '\n'.join(diff)
+
+        if diff_text:
+            # Post the diff as a comment on the PR
+            comment_body = f"Suggestions for **{file_path}**:\n\n```diff\n{diff_text}\n```"
+            pr.create_issue_comment(comment_body)
+        else:
+            print(f"No suggestions for {file_path}")
+
+if __name__ == '__main__':
+    main()