diff --git a/.github/workflows/proofread.yml b/.github/workflows/proofread.yml new file mode 100644 index 000000000..13d508a4d --- /dev/null +++ b/.github/workflows/proofread.yml @@ -0,0 +1,47 @@ +name: Proofread Markdown Files + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + proofread: + runs-on: ubuntu-latest + + steps: + - name: Checkout PR code + uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for accurate diff + ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR's HEAD commit + + - name: Fetch base branch + run: git fetch origin ${{ github.base_ref }} --depth=1 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + pip install openai PyGithub + + - name: Get list of changed Markdown files + id: changed_files + run: | + FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD -- '*.md') + echo "files<> $GITHUB_OUTPUT + echo "${FILES}" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Proofread changed Markdown files + if: steps.changed_files.outputs.files != '' + run: | + echo "${{ steps.changed_files.outputs.files }}" > changed_files.txt + python scripts/proofread.py changed_files.txt + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + PR_NUMBER: ${{ github.event.number }} diff --git a/docs/sophia/not_in_nav/queueing-and-running-jobs/job-and-queue-scheduling.md b/docs/sophia/not_in_nav/queueing-and-running-jobs/job-and-queue-scheduling.md index 748a5034e..010bc1038 100644 --- a/docs/sophia/not_in_nav/queueing-and-running-jobs/job-and-queue-scheduling.md +++ b/docs/sophia/not_in_nav/queueing-and-running-jobs/job-and-queue-scheduling.md @@ -1,4 +1,4 @@ -# Job and Queue Scheduling on ThetaGPU +# Job and Queue Sheduling on ThetaGPU ## Queues and Job Scheduling ### Nodes vs Queue vs MIG mode @@ -12,10 +12,10 @@ There are three primary queues: - bigmem - 2 of the nodes have 640 GB of memory compared to the other 22 nodes with 320 GB. Use this queue to access these 2 nodes by specifying ```-q bigmem``` in your script. A max of 2 nodes (-n 2) can be requested in this queue. - single-gpu: This is the general production queue for jobs that operate best on a single GPUs. The -n parameter in your qsub should always be 1 as you can only submit to a single gpu. If you need more than 1 gpu, use the full-node queue. -Here are the initial queue limits. You may not violate either of these policies. +Here are the initdial queue limits. You may not violate either of these policies. #### full-node queue: -- MinTime is 5 minutes +- MinTime is 5 minutes - MaxTime is 12 hours - MaxQueued will be 20 jobs - MaxRunning will be 10 jobs @@ -27,7 +27,7 @@ Here are the initial queue limits. You may not violate either of these policies. - MaxRunning is 1 job #### single-gpu queue: -- MinTime is 5 minutes +- MinTime is 5 minsutes - MaxTime is 1 hour - MaxQueued is 1 job - MaxRunning is 1 job diff --git a/scripts/proofread.py b/scripts/proofread.py new file mode 100644 index 000000000..232243e45 --- /dev/null +++ b/scripts/proofread.py @@ -0,0 +1,60 @@ +import os +import sys +import openai +from github import Github +import difflib + +def main(): + files_list_file = sys.argv[1] + with open(files_list_file, 'r') as f: + files = f.read().splitlines() + + openai.api_key = os.environ['OPENAI_API_KEY'] + github_token = os.environ['GITHUB_TOKEN'] + repository = os.environ['GITHUB_REPOSITORY'] + pr_number = int(os.environ['PR_NUMBER']) + + g = Github(github_token) + repo = g.get_repo(repository) + pr = repo.get_pull(pr_number) + + for file_path in files: + if not os.path.exists(file_path): + print(f"File {file_path} does not exist.") + continue + + with open(file_path, 'r', encoding='utf-8') as f: + original_content = f.read() + + # Use OpenAI API to proofread the content + response = openai.chat.completions.create( + model="gpt-4", + messages=[ + {"role": "system", "content": "You are a helpful assistant that proofreads and corrects markdown text."}, + {"role": "user", "content": f"Proofread and correct the following markdown content:\n\n{original_content}"}, + ], + temperature=0.0, + ) + + proofread_content = response['choices'][0]['message']['content'] + + # Compute the diff + diff = difflib.unified_diff( + original_content.splitlines(), + proofread_content.splitlines(), + fromfile=f'a/{file_path}', + tofile=f'b/{file_path}', + lineterm='' + ) + + diff_text = '\n'.join(diff) + + if diff_text: + # Post the diff as a comment on the PR + comment_body = f"Suggestions for **{file_path}**:\n\n```diff\n{diff_text}\n```" + pr.create_issue_comment(comment_body) + else: + print(f"No suggestions for {file_path}") + +if __name__ == '__main__': + main()