Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
priyanka-ganesha committed Dec 1, 2023
1 parent a5767b4 commit 48c5dc2
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 36 deletions.
2 changes: 1 addition & 1 deletion MaxText/configs/base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,5 +178,5 @@ use_iota_embed: False

#Monitoring parameters - Export in-workload metrics to Cloud monitoring
enable_cloud_monitoring: False
cloud_monitoring_dashboard: "https://pantheon.corp.google.com/monitoring/dashboards?project="
cloud_monitoring_dashboard: "https://pantheon.corp.google.com/monitoring/dashboards?"
cloud_zone: "" # zone name for cloud jobs - used for cloud metrics emitting
39 changes: 4 additions & 35 deletions MaxText/monitoring_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,11 @@
from google.cloud import monitoring_v3
from google.cloud import compute_v1
from google.api import metric_pb2
import requests
import time
import os

import max_logging

def get_metadata(project_id, zone, instance_id):
"""
Fetches metadata
Args:
project_id
zone
instance_id
Returns:
metadata as json
"""
r = requests.get(url="https://compute.googleapis.com/compute/v1/projects/\
{project_id}/zones/{zone}/instances/{instance_id}")
metadata = r.json()
return metadata

def create_custom_metric(metric_name, description):
"""
Creates a custom metric
Expand Down Expand Up @@ -99,13 +81,7 @@ def write_time_series_step(metric_name, monitoring_enabled, pyconfig, step=1):
"%d %b %Y %H:%M:%S UTC", time.gmtime(seconds_since_epoch_utc)
)
max_logging.log(
"Emitting metric ",
metric_name,
" for step = ",
step,
" at: ",
event_time,
)
f"Emitting metric {metric_name} for step = {step} at: {event_time}")

instance_id = get_instance_id(project_id, zone)

Expand All @@ -126,18 +102,11 @@ def write_time_series_step(metric_name, monitoring_enabled, pyconfig, step=1):
)
]

client.create_time_series(name=project_name, time_series=[series], metadata=get_metadata(project_id, zone, instance_id))
client.create_time_series(name=project_name, time_series=[series])
dashboard_link = pyconfig.config.cloud_monitoring_dashboard+project_name
max_logging.log(
"Time series added for step",
step,
"and instance_id ",
instance_id,
" and zone ",
zone,
"\nView dashboards or use metrics: ",
dashboard_link,
)
f"Time series added for step {step} and instance_id {instance_id} and zone {zone}\
\n View dashboards or use metrics: {dashboard_link}")
return [series]

def get_time_series_step_data(metric_name):
Expand Down
2 changes: 2 additions & 0 deletions MaxText/tests/cloud_monitoring_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def test_write_time_series_step(self):
pyconfig.initialize(sys.argv + ['configs/base.yml'], per_device_batch_size=1, run_name='test', mesh_axes = ['data'],
logical_axis_rules = [['batch', 'data']],
data_sharding = ['data'],
base_output_directory = "gs://max-experiments/",
dataset_path = "gs://maxtext-dataset/",
enable_cloud_monitoring=True,
cloud_zone='us-central2-b')
monitoring_api.create_custom_metric('test_metric', "This is an example metric")
Expand Down

0 comments on commit 48c5dc2

Please sign in to comment.