Skip to content

Commit

Permalink
Monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
MetRonnie committed Jul 25, 2024
1 parent ca367fe commit 58b8745
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 77 deletions.
86 changes: 11 additions & 75 deletions .github/workflows/test_functional.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,12 @@ jobs:
test:
runs-on: ${{ matrix.os }}
name: ${{ matrix.name || matrix.chunk }}
timeout-minutes: 45
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
os: ['ubuntu-latest']
python-version: ['3.9']
test-base: ['tests/f']
chunk: ['1/5']
platform: ['_local_background*']
# NOTE: includes must define ALL of the matrix values
Expand All @@ -49,13 +48,6 @@ jobs:
- name: 'macos-14'
os: 'macos-latest'
python-version: '3.9'
test-base: 'tests/f'
chunk: '1/5'
platform: '_local_background*'
- name: 'macos-patch'
os: 'macos-latest'
python-version: '3.9'
test-base: 'tests/f'
chunk: '1/5'
platform: '_local_background*'

Expand Down Expand Up @@ -135,9 +127,10 @@ jobs:
pip install -e ."[all]"
mkdir "$HOME/cylc-run"
- name: Apply patch
if: matrix.name == 'macos-patch'
run: git apply ./patch.diff
- name: Start monitoring CPU
run: |
python ./monitor.py &
echo "MONITOR_PID=$!" >> $GITHUB_ENV
- name: Configure Atrun
if: contains(matrix.platform, '_local_at')
Expand All @@ -150,56 +143,20 @@ jobs:
job runner = at
__HERE__
- name: Swarm Configure
run: |
etc/bin/swarm --yes --debug configure
- name: Swarm Build
if: env.REMOTE_PLATFORM == 'true'
run: |
# `swarm configure` seems to get ignored so override the user config
cp etc/conf/ssh_config $HOME/.ssh/config
# build and run the swarm
etc/bin/swarm --yes --debug build
etc/bin/swarm --yes --debug run
# test that it's up and running before proceeding
sleep 1
ssh -vv _remote_background_indep_poll hostname
- name: Configure git # Needed by the odd test
uses: cylc/release-actions/configure-git@v1

- name: Filter Tests
env:
# NOTE: we only want the CHUNK set in this step else we will
# re-chunk tests later when they run
CHUNK: ${{ matrix.chunk }}
run: |
etc/bin/run-functional-tests \
--dry \
${{ matrix.test-base }} \
> test-file
if [[ $REMOTE_PLATFORM == 'true' ]]; then
# skip tests that don't configure platform requirements
grep -l --color=never REQUIRE_PLATFORM $(cat test-file) > test-file
fi
- name: Test
id: test
timeout-minutes: 35
timeout-minutes: 10
continue-on-error: true
run: |
echo "finished=false" >> $GITHUB_OUTPUT
if [[ '${{ matrix.test-base }}' == 'tests/k' ]]; then
NPROC=4
else
NPROC=8
fi
# NOTE: test base is purposefully un-quoted
etc/bin/run-functional-tests \
-j "${NPROC}" \
-j 1 \
--state=save \
$(cat test-file) \
tests/functional/flow-triggers/11-wait-merge.t \
|| (echo "finished=true" >> $GITHUB_OUTPUT && false)
- name: Time Out
Expand All @@ -211,27 +168,6 @@ jobs:
# fail the workflow
false
- name: Re-run failed tests
timeout-minutes: 10
if: steps.test.outcome == 'failure' && steps.test.outputs.finished == 'true'
run: |
# re-run failed tests providing that they didn't time out first time
# TODO: make the tests deterministic so we don't need to do this
etc/bin/run-functional-tests \
-j 1 \
-v \
--state=save,failed $(cat test-file)
- name: Copy cylc-run out of container
if: failure() && steps.test.outcome == 'failure' && env.REMOTE_PLATFORM == 'true'
run: |
# pick the first host in the list
host="$(cut -d ' ' -f 1 <<< "${{ matrix.platform }}")"
# copy back the remote cylc-run dir
rsync -av \
"${host}:/root/cylc-run/" \
"${HOME}/cylc-run/${host}/"
- name: Debug
if: always()
timeout-minutes: 1
Expand All @@ -258,7 +194,7 @@ jobs:
name: cylc-run (${{ steps.uploadname.outputs.uploadname }})
path: ~/cylc-run/

- name: Shutdown
if: always()
- name: Stop monitoring CPU
run: |
etc/bin/swarm kill
kill $MONITOR_PID
cat ./cpu.txt
3 changes: 1 addition & 2 deletions cylc/flow/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ class Scheduler:
is_reloaded = False

# main loop
main_loop_intervals: deque = deque(maxlen=10)
main_loop_plugins: Optional[dict] = None
auto_restart_mode: Optional[AutoRestartMode] = None
auto_restart_time: Optional[float] = None
Expand Down Expand Up @@ -1616,7 +1615,7 @@ async def _main_loop(self) -> None:
duration = self.INTERVAL_MAIN_LOOP - elapsed
await asyncio.sleep(duration)
# Record latest main loop interval
self.main_loop_intervals.append(time() - tinit)
LOG.debug(f"Main loop: {time() - tinit}s")
# END MAIN LOOP

def _update_workflow_state(self):
Expand Down
9 changes: 9 additions & 0 deletions monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import psutil

def monitor():
while True:
with open('cpu.txt', 'a') as f:
print(psutil.cpu_percent(interval=5), file=f)

if __name__ == '__main__':
monitor()

0 comments on commit 58b8745

Please sign in to comment.