Skip to content

Commit

Permalink
k8s logtail
Browse files Browse the repository at this point in the history
  • Loading branch information
asaiacai committed Jul 17, 2024
1 parent 465d36c commit ee55e82
Showing 1 changed file with 44 additions and 3 deletions.
47 changes: 44 additions & 3 deletions sky/templates/kubernetes-ray.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,49 @@ available_node_types:
image: {{image_id}}
# Do not change this command - it keeps the pod alive until it is
# explicitly killed.
command: ["/bin/bash", "-c", "--"]
args: ['trap : TERM INT; sleep infinity & wait;']
command: ["/bin/bash", "-c"]
args:
- |
FILE_PATTERN="~/sky_logs/*/tasks/*.log"
while ! ls $(eval echo $FILE_PATTERN) 1> /dev/null 2>&1; do
sleep 5
done
# Tails file and checks every 5 sec for
# open file handlers with write access
# closes if none exist
monitor_file() {
tail -n 0 -f $file &
TAIL_PID=$!
while kill -0 $TAIL_PID 2> /dev/null; do
# only two PIDs should be accessing the file
# the log appender and log tailer
if [ $(lsof -w $file | wc -l) -lt 3 ]; then
kill $TAIL_PID
break
fi
sleep 5
done
}

# Keep track of already monitored files
already_monitored=""

# Infinite loop to continuously check for new files
while true; do
for file in $(eval echo $FILE_PATTERN); do
if echo $already_monitored | grep -q $file; then
# File is already being monitored
continue
fi

# Monitor the new file
monitor_file $file &
already_monitored="${already_monitored} ${file}"
done

sleep 5
done

ports:
- containerPort: 22 # Used for SSH
- containerPort: {{ray_port}} # Redis port
Expand Down Expand Up @@ -365,7 +406,7 @@ setup_commands:
# Line 'sudo grep ..': set the number of threads per process to unlimited to avoid ray job submit stucking issue when the number of running ray jobs increase.
# Line 'mkdir -p ..': disable host key check
# Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys`
- sudo DEBIAN_FRONTEND=noninteractive apt install gcc patch pciutils rsync fuse curl -y;
- sudo DEBIAN_FRONTEND=noninteractive apt install lsof gcc patch pciutils rsync fuse curl -y;
mkdir -p ~/.ssh; touch ~/.ssh/config;
{%- for initial_setup_command in initial_setup_commands %}
{{ initial_setup_command }}
Expand Down

0 comments on commit ee55e82

Please sign in to comment.