diff --git a/acceptance/bin/wait_pid.py b/acceptance/bin/wait_pid.py new file mode 100755 index 0000000000..35bc0eedd6 --- /dev/null +++ b/acceptance/bin/wait_pid.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +import sys +import os +import time +import platform +import subprocess + + +def is_finished_windows(pid): + assert int(pid) > 0 + + # Run task list but filter the list by pid.. + output = subprocess.check_output(f'tasklist /fi "PID eq {pid}"', text=True) + + # If tasklist does not find any process, that means the process we are + # waiting for has terminated. + unaliveMsg = "No tasks are running which match the specified criteria." + if unaliveMsg in output: + return True + + return False + + +def is_finished_unix(pid): + assert int(pid) > 0 + + # Send signal 0 to the process to check if it exists. + # From the docs: + # If sig is 0, then no signal is sent, but existence and permission + # checks are still performed; this can be used to check for the + # existence of a process ID or process group ID that the caller is + # permitted to signal. + # ref: https://man7.org/linux/man-pages/man2/kill.2.html + try: + os.kill(pid, 0) + except OSError: + return True + + return False + + +def wait_pid(pid): + max_attempts = 600 # 600 * 0.1 seconds = 1 minute + sleep_time = 0.1 + + for i in range(max_attempts): + if platform.system().lower() == "windows": + if is_finished_windows(pid): + print("[wait_pid] process has ended") + return 0 + + else: + if is_finished_unix(pid): + print("[wait_pid] process has ended") + return 0 + + time.sleep(sleep_time) + + print(f"Timeout: Process {pid} did not end within 1 minute") + return 1 + + +exit_code = wait_pid(int(sys.argv[1])) +sys.exit(exit_code) diff --git a/acceptance/telemetry/test.toml b/acceptance/telemetry/test.toml new file mode 100644 index 0000000000..a7f8adeb3e --- /dev/null +++ b/acceptance/telemetry/test.toml @@ -0,0 +1,26 @@ +IncludeRequestHeaders = ["Authorization"] +RecordRequests = true + +Local = true +Cloud = false + +[[Repls]] +Old = '17\d{11}' +New = '"UNIX_TIME_MILLIS"' + +[[Repls]] +Old = 'darwin|linux|windows' +New = '[OS]' + +[[Repls]] +Old = 'execution_time_ms\\\":\d{1,5},' +New = 'execution_time_ms\":\"SMALL_INT\",' + +[[Server]] +Pattern = "POST /telemetry-ext" +Response.Body = ''' +{ + "errors": [], + "numProtoSuccess": 2 +} +''' diff --git a/acceptance/telemetry/upload-command/out.requests.txt b/acceptance/telemetry/upload-command/out.requests.txt new file mode 100644 index 0000000000..22bf5fcafc --- /dev/null +++ b/acceptance/telemetry/upload-command/out.requests.txt @@ -0,0 +1,17 @@ +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/upload-command/output.txt b/acceptance/telemetry/upload-command/output.txt new file mode 100644 index 0000000000..79c8c1f741 --- /dev/null +++ b/acceptance/telemetry/upload-command/output.txt @@ -0,0 +1,5 @@ + +>>> [CLI] telemetry upload +Telemetry logs uploaded successfully +Response: +{"errors":[],"numProtoSuccess":2} diff --git a/acceptance/telemetry/upload-command/script b/acceptance/telemetry/upload-command/script new file mode 100644 index 0000000000..053d77b689 --- /dev/null +++ b/acceptance/telemetry/upload-command/script @@ -0,0 +1,5 @@ +export DATABRICKS_CLI_TELEMETRY_LOGFILE=./out.upload.txt + +# This command / test cannot be run in inprocess / debug mode. This is because +# it does not go through the [root.Execute] function. +trace $CLI telemetry upload < stdin diff --git a/acceptance/telemetry/upload-command/stdin b/acceptance/telemetry/upload-command/stdin new file mode 100644 index 0000000000..373e1545e0 --- /dev/null +++ b/acceptance/telemetry/upload-command/stdin @@ -0,0 +1,24 @@ +{ + "logs": [ + { + "frontend_log_event_id": "AAAAAAAA-AAAA-AAAA-AAAA-AAAAAAAAAAAA", + "entry": { + "databricks_cli_log": { + "cli_test_event": { + "name": "VALUE1" + } + } + } + }, + { + "frontend_log_event_id": "BBBBBBBB-BBBB-BBBB-BBBB-BBBBBBBBBBBB", + "entry": { + "databricks_cli_log": { + "cli_test_event": { + "name": "VALUE2" + } + } + } + } + ] +} diff --git a/acceptance/telemetry/upload-fails-with-debug-logs/out.requests.txt b/acceptance/telemetry/upload-fails-with-debug-logs/out.requests.txt new file mode 100644 index 0000000000..95b21f5c3d --- /dev/null +++ b/acceptance/telemetry/upload-fails-with-debug-logs/out.requests.txt @@ -0,0 +1,51 @@ +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/upload-fails-with-debug-logs/out.upload_process.txt b/acceptance/telemetry/upload-fails-with-debug-logs/out.upload_process.txt new file mode 100644 index 0000000000..902a64cf3a --- /dev/null +++ b/acceptance/telemetry/upload-fails-with-debug-logs/out.upload_process.txt @@ -0,0 +1,53 @@ +HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], telemetry, upload, --log-level=debug" +HH:MM:SS Debug: POST /telemetry-ext +> { +> "items": null, +> "protoLogs": [ +> "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"... (246 more bytes)", +> "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"... (246 more bytes)" +> ], +> "uploadTime": "UNIX_TIME_MILLIS" +> } +< HTTP/1.1 501 Not Implemented +< { +< "error_code": "ERROR_CODE", +< "message": "Endpoint not implemented." +< } pid=PID sdk=true +HH:MM:SS Debug: non-retriable error: Endpoint not implemented. pid=PID sdk=true +HH:MM:SS Warn: Attempt 1 failed due to a server side error. Retrying status code: 501 + pid=PID +HH:MM:SS Debug: POST /telemetry-ext +> { +> "items": null, +> "protoLogs": [ +> "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"... (246 more bytes)", +> "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"... (246 more bytes)" +> ], +> "uploadTime": "UNIX_TIME_MILLIS" +> } +< HTTP/1.1 501 Not Implemented +< { +< "error_code": "ERROR_CODE", +< "message": "Endpoint not implemented." +< } pid=PID sdk=true +HH:MM:SS Debug: non-retriable error: Endpoint not implemented. pid=PID sdk=true +HH:MM:SS Warn: Attempt 2 failed due to a server side error. Retrying status code: 501 + pid=PID +HH:MM:SS Debug: POST /telemetry-ext +> { +> "items": null, +> "protoLogs": [ +> "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"... (246 more bytes)", +> "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"... (246 more bytes)" +> ], +> "uploadTime": "UNIX_TIME_MILLIS" +> } +< HTTP/1.1 501 Not Implemented +< { +< "error_code": "ERROR_CODE", +< "message": "Endpoint not implemented." +< } pid=PID sdk=true +HH:MM:SS Debug: non-retriable error: Endpoint not implemented. pid=PID sdk=true +HH:MM:SS Warn: Attempt 3 failed due to a server side error. Retrying status code: 501 + pid=PID +HH:MM:SS Error: upload did not succeed after three attempts pid=PID diff --git a/acceptance/telemetry/upload-fails-with-debug-logs/output.txt b/acceptance/telemetry/upload-fails-with-debug-logs/output.txt new file mode 100644 index 0000000000..e0f522e9dc --- /dev/null +++ b/acceptance/telemetry/upload-fails-with-debug-logs/output.txt @@ -0,0 +1,5 @@ + +>>> [CLI] selftest send-telemetry --debug +HH:MM:SS Info: start pid=PID version=[DEV_VERSION] args="[CLI], selftest, send-telemetry, --debug" +HH:MM:SS Info: completed execution pid=PID exit_code=0 +[wait_pid] process has ended diff --git a/acceptance/telemetry/upload-fails-with-debug-logs/script b/acceptance/telemetry/upload-fails-with-debug-logs/script new file mode 100644 index 0000000000..6c03503def --- /dev/null +++ b/acceptance/telemetry/upload-fails-with-debug-logs/script @@ -0,0 +1,12 @@ +export DATABRICKS_CLI_TELEMETRY_PIDFILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_LOGFILE=./out.upload_process.txt + +# This test ensures that the main CLI command does not error even if +# telemetry upload fails. +trace $CLI selftest send-telemetry --debug + +# Wait for the child telemetry process to finish +wait_pid.py $(cat ./telemetry.pid) + +# cleanup the pid file +rm ./telemetry.pid diff --git a/acceptance/telemetry/upload-fails-with-debug-logs/test.toml b/acceptance/telemetry/upload-fails-with-debug-logs/test.toml new file mode 100644 index 0000000000..7b441d48de --- /dev/null +++ b/acceptance/telemetry/upload-fails-with-debug-logs/test.toml @@ -0,0 +1,17 @@ +[[Server]] +Pattern = "POST /telemetry-ext" +Response.Body = ''' +{ + "error_code": "ERROR_CODE", + "message": "Endpoint not implemented." +} +''' +Response.StatusCode = 501 + +[[Repls]] +Old = "(?:[01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]" +New = "HH:MM:SS" + +[[Repls]] +Old = "pid=[0-9]+" +New = "pid=PID" diff --git a/acceptance/telemetry/upload-fails/out.requests.txt b/acceptance/telemetry/upload-fails/out.requests.txt new file mode 100644 index 0000000000..95b21f5c3d --- /dev/null +++ b/acceptance/telemetry/upload-fails/out.requests.txt @@ -0,0 +1,51 @@ +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/upload-fails/out.upload_process.txt b/acceptance/telemetry/upload-fails/out.upload_process.txt new file mode 100644 index 0000000000..53aaa8ae17 --- /dev/null +++ b/acceptance/telemetry/upload-fails/out.upload_process.txt @@ -0,0 +1,7 @@ +Warn: Attempt 1 failed due to a server side error. Retrying status code: 501 + +Warn: Attempt 2 failed due to a server side error. Retrying status code: 501 + +Warn: Attempt 3 failed due to a server side error. Retrying status code: 501 + +Error: upload did not succeed after three attempts diff --git a/acceptance/telemetry/upload-fails/output.txt b/acceptance/telemetry/upload-fails/output.txt new file mode 100644 index 0000000000..5e94a34d3e --- /dev/null +++ b/acceptance/telemetry/upload-fails/output.txt @@ -0,0 +1,3 @@ + +>>> [CLI] selftest send-telemetry +[wait_pid] process has ended diff --git a/acceptance/telemetry/upload-fails/script b/acceptance/telemetry/upload-fails/script new file mode 100644 index 0000000000..e226196ed7 --- /dev/null +++ b/acceptance/telemetry/upload-fails/script @@ -0,0 +1,12 @@ +export DATABRICKS_CLI_TELEMETRY_PIDFILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_LOGFILE=./out.upload_process.txt + +# This test ensures that the main CLI command does not error even if +# telemetry upload fails. +trace $CLI selftest send-telemetry + +# Wait for the child telemetry process to finish +wait_pid.py $(cat ./telemetry.pid) + +# cleanup the pid file +rm ./telemetry.pid diff --git a/acceptance/telemetry/upload-fails/test.toml b/acceptance/telemetry/upload-fails/test.toml new file mode 100644 index 0000000000..a7fa942e44 --- /dev/null +++ b/acceptance/telemetry/upload-fails/test.toml @@ -0,0 +1,9 @@ +[[Server]] +Pattern = "POST /telemetry-ext" +Response.Body = ''' +{ + "error_code": "ERROR_CODE", + "message": "Endpoint not implemented." +} +''' +Response.StatusCode = 501 diff --git a/acceptance/telemetry/upload-partially-succeeds/out.requests.txt b/acceptance/telemetry/upload-partially-succeeds/out.requests.txt new file mode 100644 index 0000000000..95b21f5c3d --- /dev/null +++ b/acceptance/telemetry/upload-partially-succeeds/out.requests.txt @@ -0,0 +1,51 @@ +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/upload-partially-succeeds/out.upload_process.txt b/acceptance/telemetry/upload-partially-succeeds/out.upload_process.txt new file mode 100644 index 0000000000..a56a97213c --- /dev/null +++ b/acceptance/telemetry/upload-partially-succeeds/out.upload_process.txt @@ -0,0 +1,7 @@ +Warn: Attempt 1 was a partial success. Number of logs uploaded: 1 out of 2 + +Warn: Attempt 2 was a partial success. Number of logs uploaded: 1 out of 2 + +Warn: Attempt 3 was a partial success. Number of logs uploaded: 1 out of 2 + +Error: upload did not succeed after three attempts diff --git a/acceptance/telemetry/upload-partially-succeeds/output.txt b/acceptance/telemetry/upload-partially-succeeds/output.txt new file mode 100644 index 0000000000..1a1f61588c --- /dev/null +++ b/acceptance/telemetry/upload-partially-succeeds/output.txt @@ -0,0 +1,4 @@ + +>>> [CLI] selftest send-telemetry +waiting for telemetry process to finish +[wait_pid] process has ended diff --git a/acceptance/telemetry/upload-partially-succeeds/script b/acceptance/telemetry/upload-partially-succeeds/script new file mode 100644 index 0000000000..ae43384134 --- /dev/null +++ b/acceptance/telemetry/upload-partially-succeeds/script @@ -0,0 +1,12 @@ +export DATABRICKS_CLI_TELEMETRY_PIDFILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_LOGFILE=./out.upload_process.txt + +trace $CLI selftest send-telemetry + +echo "waiting for telemetry process to finish" + +# Wait for the child telemetry process to finish +wait_pid.py $(cat ./telemetry.pid) + +# cleanup the pid file +rm ./telemetry.pid diff --git a/acceptance/telemetry/upload-partially-succeeds/test.toml b/acceptance/telemetry/upload-partially-succeeds/test.toml new file mode 100644 index 0000000000..4e2bd9faba --- /dev/null +++ b/acceptance/telemetry/upload-partially-succeeds/test.toml @@ -0,0 +1,8 @@ +[[Server]] +Pattern = "POST /telemetry-ext" +Response.Body = ''' +{ + "errors": [], + "numProtoSuccess": 1 +} +''' diff --git a/acceptance/telemetry/upload-skipped/output.txt b/acceptance/telemetry/upload-skipped/output.txt new file mode 100644 index 0000000000..10ecff19b2 --- /dev/null +++ b/acceptance/telemetry/upload-skipped/output.txt @@ -0,0 +1,3 @@ + +>>> [CLI] selftest send-telemetry +telemetry process not started. No pid file found diff --git a/acceptance/telemetry/upload-skipped/script b/acceptance/telemetry/upload-skipped/script new file mode 100644 index 0000000000..46e5f6b5f3 --- /dev/null +++ b/acceptance/telemetry/upload-skipped/script @@ -0,0 +1,14 @@ +export DATABRICKS_CLI_TELEMETRY_PIDFILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_LOGFILE=./out.upload_process.txt +export DATABRICKS_CLI_DISABLE_TELEMETRY="true" + +trace $CLI selftest send-telemetry + +if [ -f ./telemetry.pid ]; then + echo "telemetry process was started" + exit 1 +else + # The telemetry process should not be started because the DATABRICKS_CLI_DISABLE_TELEMETRY + # environment variable is set. + echo "telemetry process not started. No pid file found" +fi diff --git a/acceptance/telemetry/upload-succeeds-integration/out.upload_process.txt b/acceptance/telemetry/upload-succeeds-integration/out.upload_process.txt new file mode 100644 index 0000000000..2fd4e33064 --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-integration/out.upload_process.txt @@ -0,0 +1,3 @@ +Telemetry logs uploaded successfully +Response: +{"errors":[],"numProtoSuccess":2} diff --git a/acceptance/telemetry/upload-succeeds-integration/output.txt b/acceptance/telemetry/upload-succeeds-integration/output.txt new file mode 100644 index 0000000000..1a1f61588c --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-integration/output.txt @@ -0,0 +1,4 @@ + +>>> [CLI] selftest send-telemetry +waiting for telemetry process to finish +[wait_pid] process has ended diff --git a/acceptance/telemetry/upload-succeeds-integration/script b/acceptance/telemetry/upload-succeeds-integration/script new file mode 100644 index 0000000000..ae43384134 --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-integration/script @@ -0,0 +1,12 @@ +export DATABRICKS_CLI_TELEMETRY_PIDFILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_LOGFILE=./out.upload_process.txt + +trace $CLI selftest send-telemetry + +echo "waiting for telemetry process to finish" + +# Wait for the child telemetry process to finish +wait_pid.py $(cat ./telemetry.pid) + +# cleanup the pid file +rm ./telemetry.pid diff --git a/acceptance/telemetry/upload-succeeds-integration/test.toml b/acceptance/telemetry/upload-succeeds-integration/test.toml new file mode 100644 index 0000000000..fa4188ba45 --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-integration/test.toml @@ -0,0 +1,7 @@ +# RecordRequests is not supported for integration tests yet. Once we have support +# for running integration tests with record requests we can remove this test since +# it's a copy of acceptance/telemetry/upload-succeeds. +Local = true +Cloud = true + +RecordRequests = false diff --git a/acceptance/telemetry/upload-succeeds-oauth/out.requests.txt b/acceptance/telemetry/upload-succeeds-oauth/out.requests.txt new file mode 100644 index 0000000000..1867eaf1dd --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-oauth/out.requests.txt @@ -0,0 +1,45 @@ +{ + "method": "GET", + "path": "/oidc/.well-known/oauth-authorization-server" +} +{ + "headers": { + "Authorization": [ + "Basic [ENCODED_AUTH]" + ] + }, + "method": "POST", + "path": "/oidc/v1/token", + "raw_body": "grant_type=client_credentials\u0026scope=all-apis" +} +{ + "method": "GET", + "path": "/oidc/.well-known/oauth-authorization-server" +} +{ + "headers": { + "Authorization": [ + "Basic [ENCODED_AUTH]" + ] + }, + "method": "POST", + "path": "/oidc/v1/token", + "raw_body": "grant_type=client_credentials\u0026scope=all-apis" +} +{ + "headers": { + "Authorization": [ + "Bearer oauth-token" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/upload-succeeds-oauth/out.upload_process.txt b/acceptance/telemetry/upload-succeeds-oauth/out.upload_process.txt new file mode 100644 index 0000000000..2fd4e33064 --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-oauth/out.upload_process.txt @@ -0,0 +1,3 @@ +Telemetry logs uploaded successfully +Response: +{"errors":[],"numProtoSuccess":2} diff --git a/acceptance/telemetry/upload-succeeds-oauth/output.txt b/acceptance/telemetry/upload-succeeds-oauth/output.txt new file mode 100644 index 0000000000..1a1f61588c --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-oauth/output.txt @@ -0,0 +1,4 @@ + +>>> [CLI] selftest send-telemetry +waiting for telemetry process to finish +[wait_pid] process has ended diff --git a/acceptance/telemetry/upload-succeeds-oauth/script b/acceptance/telemetry/upload-succeeds-oauth/script new file mode 100644 index 0000000000..d1d8767625 --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-oauth/script @@ -0,0 +1,19 @@ +export DATABRICKS_CLI_TELEMETRY_PIDFILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_LOGFILE=./out.upload_process.txt + +# Unset the token which is configured by default +# in acceptance tests +export DATABRICKS_TOKEN="" + +export DATABRICKS_CLIENT_ID=client_id +export DATABRICKS_CLIENT_SECRET=client_secret + +trace $CLI selftest send-telemetry + +echo "waiting for telemetry process to finish" + +# Wait for the child telemetry process to finish +wait_pid.py $(cat ./telemetry.pid) + +# cleanup the pid file +rm ./telemetry.pid diff --git a/acceptance/telemetry/upload-succeeds-oauth/test.toml b/acceptance/telemetry/upload-succeeds-oauth/test.toml new file mode 100644 index 0000000000..2adade96ac --- /dev/null +++ b/acceptance/telemetry/upload-succeeds-oauth/test.toml @@ -0,0 +1,5 @@ +# "client_id:client_secret" in base64 is Y2xpZW50X2lkOmNsaWVudF9zZWNyZXQ=, expect to +# see this in Authorization header +[[Repls]] +Old = "Y2xpZW50X2lkOmNsaWVudF9zZWNyZXQ=" +New = "[ENCODED_AUTH]" diff --git a/acceptance/telemetry/upload-succeeds/out.requests.txt b/acceptance/telemetry/upload-succeeds/out.requests.txt new file mode 100644 index 0000000000..d8260f5623 --- /dev/null +++ b/acceptance/telemetry/upload-succeeds/out.requests.txt @@ -0,0 +1,17 @@ +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ] + }, + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"selftest_send-telemetry\",\"operating_system\":\"[OS]\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/upload-succeeds/out.upload_process.txt b/acceptance/telemetry/upload-succeeds/out.upload_process.txt new file mode 100644 index 0000000000..2fd4e33064 --- /dev/null +++ b/acceptance/telemetry/upload-succeeds/out.upload_process.txt @@ -0,0 +1,3 @@ +Telemetry logs uploaded successfully +Response: +{"errors":[],"numProtoSuccess":2} diff --git a/acceptance/telemetry/upload-succeeds/output.txt b/acceptance/telemetry/upload-succeeds/output.txt new file mode 100644 index 0000000000..1a1f61588c --- /dev/null +++ b/acceptance/telemetry/upload-succeeds/output.txt @@ -0,0 +1,4 @@ + +>>> [CLI] selftest send-telemetry +waiting for telemetry process to finish +[wait_pid] process has ended diff --git a/acceptance/telemetry/upload-succeeds/script b/acceptance/telemetry/upload-succeeds/script new file mode 100644 index 0000000000..ae43384134 --- /dev/null +++ b/acceptance/telemetry/upload-succeeds/script @@ -0,0 +1,12 @@ +export DATABRICKS_CLI_TELEMETRY_PIDFILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_LOGFILE=./out.upload_process.txt + +trace $CLI selftest send-telemetry + +echo "waiting for telemetry process to finish" + +# Wait for the child telemetry process to finish +wait_pid.py $(cat ./telemetry.pid) + +# cleanup the pid file +rm ./telemetry.pid diff --git a/cmd/cmd.go b/cmd/cmd.go index 4f5337fd3c..792c325f0e 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -14,6 +14,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/selftest" "github.com/databricks/cli/cmd/sync" + "github.com/databricks/cli/cmd/telemetry" "github.com/databricks/cli/cmd/version" "github.com/databricks/cli/cmd/workspace" "github.com/spf13/cobra" @@ -76,6 +77,7 @@ func New(ctx context.Context) *cobra.Command { cli.AddCommand(sync.New()) cli.AddCommand(version.New()) cli.AddCommand(selftest.New()) + cli.AddCommand(telemetry.New()) return cli } diff --git a/cmd/root/root.go b/cmd/root/root.go index 04815f48bd..9f0024635e 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -2,17 +2,26 @@ package root import ( "context" + "encoding/json" "errors" "fmt" "log/slog" "os" + "runtime" "runtime/debug" "strings" + "time" "github.com/databricks/cli/internal/build" + "github.com/databricks/cli/libs/auth" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/daemon" "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/databricks/databricks-sdk-go/logger" "github.com/spf13/cobra" ) @@ -74,9 +83,6 @@ func New(ctx context.Context) *cobra.Command { // get the context back ctx = cmd.Context() - // Detect if the CLI is running on DBR and store this on the context. - ctx = dbr.DetectRuntime(ctx) - // Configure our user agent with the command that's about to be executed. ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandExecIdInUserAgent(ctx) @@ -124,6 +130,10 @@ Stack Trace: %s`, version, r, string(trace)) }() + ctx = telemetry.WithNewLogger(ctx) + ctx = dbr.DetectRuntime(ctx) + startTime := time.Now() + // Run the command cmd, err = cmd.ExecuteContextC(ctx) if err != nil && !errors.Is(err, ErrAlreadyPrinted) { @@ -150,6 +160,75 @@ Stack Trace: ) } } + exitCode := 0 + if err != nil { + exitCode = 1 + } + uploadTelemetry(cmd.Context(), commandString(cmd), startTime, exitCode) return err } + +func uploadTelemetry(ctx context.Context, cmdStr string, startTime time.Time, exitCode int) { + // Nothing to upload. + if !telemetry.HasLogs(ctx) { + return + } + + // Telemetry is disabled. We don't upload logs. + if os.Getenv(telemetry.DisableEnvVar) != "" { + return + } + + telemetry.SetExecutionContext(ctx, protos.ExecutionContext{ + CmdExecID: cmdExecId, + Version: build.GetInfo().Version, + Command: cmdStr, + OperatingSystem: runtime.GOOS, + DbrVersion: env.Get(ctx, dbr.EnvVarName), + ExecutionTimeMs: time.Since(startTime).Milliseconds(), + ExitCode: int64(exitCode), + }) + + logs := telemetry.GetLogs(ctx) + + in := telemetry.UploadConfig{ + Logs: logs, + } + + // Default to warn log level. If debug is enabled in the parent process, we set + // the log level to debug for the telemetry worker as well. + logLevel := "warn" + if log.GetLogger(ctx).Enabled(ctx, logger.LevelDebug) { + logLevel = "debug" + } + + d := daemon.Daemon{ + Args: []string{"telemetry", "upload", "--log-level=" + logLevel}, + Env: auth.ProcessEnv(ConfigUsed(ctx)), + PidFilePath: os.Getenv(telemetry.PidFileEnvVar), + LogFile: os.Getenv(telemetry.UploadLogsFileEnvVar), + } + + err := d.Start() + if err != nil { + log.Warnf(ctx, "failed to start telemetry worker: %s", err) + return + } + + // If the telemetry worker is started successfully, we write the logs to its stdin. + b, err := json.Marshal(in) + if err != nil { + log.Warnf(ctx, "failed to marshal telemetry logs: %s", err) + return + } + err = d.WriteInput(b) + if err != nil { + log.Warnf(ctx, "failed to write to telemetry worker: %s", err) + } + + err = d.Release() + if err != nil { + log.Warnf(ctx, "failed to release telemetry worker: %s", err) + } +} diff --git a/cmd/root/user_agent_command_exec_id.go b/cmd/root/user_agent_command_exec_id.go index 3bf32b703f..e3416983d0 100644 --- a/cmd/root/user_agent_command_exec_id.go +++ b/cmd/root/user_agent_command_exec_id.go @@ -7,8 +7,10 @@ import ( "github.com/google/uuid" ) +var cmdExecId = uuid.New().String() + func withCommandExecIdInUserAgent(ctx context.Context) context.Context { // A UUID that will allow us to correlate multiple API requests made by // the same CLI invocation. - return useragent.InContext(ctx, "cmd-exec-id", uuid.New().String()) + return useragent.InContext(ctx, "cmd-exec-id", cmdExecId) } diff --git a/cmd/selftest/selftest.go b/cmd/selftest/selftest.go index 7d8cfcb76e..b3e8541d4a 100644 --- a/cmd/selftest/selftest.go +++ b/cmd/selftest/selftest.go @@ -11,6 +11,7 @@ func New() *cobra.Command { Hidden: true, } + cmd.AddCommand(newSendTelemetry()) cmd.AddCommand(newPanic()) return cmd } diff --git a/cmd/selftest/send_telemetry.go b/cmd/selftest/send_telemetry.go new file mode 100644 index 0000000000..b091058e79 --- /dev/null +++ b/cmd/selftest/send_telemetry.go @@ -0,0 +1,29 @@ +package selftest + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/spf13/cobra" +) + +func newSendTelemetry() *cobra.Command { + cmd := &cobra.Command{ + Use: "send-telemetry", + Short: "log some test telemetry events", + PreRunE: root.MustWorkspaceClient, + } + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + for _, v := range []string{"VALUE1", "VALUE2"} { + telemetry.Log(cmd.Context(), protos.DatabricksCliLog{ + CliTestEvent: &protos.CliTestEvent{ + Name: protos.DummyCliEnum(v), + }, + }) + } + return nil + } + + return cmd +} diff --git a/cmd/telemetry/telemetry.go b/cmd/telemetry/telemetry.go new file mode 100644 index 0000000000..8d1a635bbc --- /dev/null +++ b/cmd/telemetry/telemetry.go @@ -0,0 +1,16 @@ +package telemetry + +import ( + "github.com/spf13/cobra" +) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "telemetry", + Short: "commands that are used to upload telemetry", + Hidden: true, + } + + cmd.AddCommand(newTelemetryUpload()) + return cmd +} diff --git a/cmd/telemetry/upload.go b/cmd/telemetry/upload.go new file mode 100644 index 0000000000..31ec5c624b --- /dev/null +++ b/cmd/telemetry/upload.go @@ -0,0 +1,38 @@ +package telemetry + +import ( + "encoding/json" + "os" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/telemetry" + "github.com/spf13/cobra" +) + +func newTelemetryUpload() *cobra.Command { + return &cobra.Command{ + Use: "upload", + Run: func(cmd *cobra.Command, args []string) { + ctx := cmd.Context() + + // We exit the process explicitly at the end of Run to avoid the possibility + // of the worker being spawned recursively. This could otherwise happen if + // say telemetry was logged as part of this command. + defer os.Exit(0) + + resp, err := telemetry.Upload(ctx) + if err != nil { + log.Error(ctx, err.Error()) + os.Exit(1) + } + + cmdio.LogString(ctx, "Telemetry logs uploaded successfully") + cmdio.LogString(ctx, "Response:") + b, err := json.Marshal(resp) + if err == nil { + cmdio.LogString(ctx, string(b)) + } + }, + } +} diff --git a/libs/dbr/detect.go b/libs/dbr/detect.go index d8b4dfe20e..12b83dc8c8 100644 --- a/libs/dbr/detect.go +++ b/libs/dbr/detect.go @@ -11,6 +11,8 @@ import ( // Dereference [os.Stat] to allow mocking in tests. var statFunc = os.Stat +const EnvVarName = "DATABRICKS_RUNTIME_VERSION" + // detect returns true if the current process is running on a Databricks Runtime. // Its return value is meant to be cached in the context. func detect(ctx context.Context) bool { @@ -21,7 +23,7 @@ func detect(ctx context.Context) bool { } // Databricks Runtime always has the DATABRICKS_RUNTIME_VERSION environment variable set. - if value, ok := env.Lookup(ctx, "DATABRICKS_RUNTIME_VERSION"); !ok || value == "" { + if value, ok := env.Lookup(ctx, EnvVarName); !ok || value == "" { return false } diff --git a/libs/telemetry/context.go b/libs/telemetry/context.go new file mode 100644 index 0000000000..e556e462cd --- /dev/null +++ b/libs/telemetry/context.go @@ -0,0 +1,25 @@ +package telemetry + +import ( + "context" + "errors" +) + +// Private type to store the telemetry logger in the context +type telemetryLogger int + +// Key to store the telemetry logger in the context +var telemetryLoggerKey telemetryLogger + +func WithNewLogger(ctx context.Context) context.Context { + return context.WithValue(ctx, telemetryLoggerKey, &logger{}) +} + +func fromContext(ctx context.Context) *logger { + v := ctx.Value(telemetryLoggerKey) + if v == nil { + panic(errors.New("telemetry logger not found in the context")) + } + + return v.(*logger) +} diff --git a/libs/telemetry/logger.go b/libs/telemetry/logger.go new file mode 100644 index 0000000000..4fbce2f52a --- /dev/null +++ b/libs/telemetry/logger.go @@ -0,0 +1,50 @@ +package telemetry + +import ( + "context" + + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/google/uuid" +) + +func Log(ctx context.Context, event protos.DatabricksCliLog) { + fromContext(ctx).log(event) +} + +func GetLogs(ctx context.Context) []protos.FrontendLog { + return fromContext(ctx).getLogs() +} + +func HasLogs(ctx context.Context) bool { + return len(fromContext(ctx).getLogs()) > 0 +} + +func SetExecutionContext(ctx context.Context, ec protos.ExecutionContext) { + fromContext(ctx).setExecutionContext(ec) +} + +type logger struct { + logs []protos.FrontendLog +} + +func (l *logger) log(event protos.DatabricksCliLog) { + if l.logs == nil { + l.logs = make([]protos.FrontendLog, 0) + } + l.logs = append(l.logs, protos.FrontendLog{ + FrontendLogEventID: uuid.New().String(), + Entry: protos.FrontendLogEntry{ + DatabricksCliLog: event, + }, + }) +} + +func (l *logger) getLogs() []protos.FrontendLog { + return l.logs +} + +func (l *logger) setExecutionContext(ec protos.ExecutionContext) { + for i := range l.logs { + l.logs[i].Entry.DatabricksCliLog.ExecutionContext = &ec + } +} diff --git a/libs/telemetry/protos/databricks_cli_log.go b/libs/telemetry/protos/databricks_cli_log.go index 9e4e59596f..64baa6b384 100644 --- a/libs/telemetry/protos/databricks_cli_log.go +++ b/libs/telemetry/protos/databricks_cli_log.go @@ -24,10 +24,12 @@ type ExecutionContext struct { FromWebTerminal bool `json:"from_web_terminal,omitempty"` // Time taken for the CLI command to execute. - ExecutionTimeMs int64 `json:"execution_time_ms,omitempty"` + // We want to serialize the zero value as well so the omitempty tag is not set. + ExecutionTimeMs int64 `json:"execution_time_ms"` // Exit code of the CLI command. - ExitCode int64 `json:"exit_code,omitempty"` + // We want to serialize the zero value as well so the omitempty tag is not set. + ExitCode int64 `json:"exit_code"` } type CliTestEvent struct { diff --git a/libs/telemetry/upload.go b/libs/telemetry/upload.go new file mode 100644 index 0000000000..6c31df2616 --- /dev/null +++ b/libs/telemetry/upload.go @@ -0,0 +1,133 @@ +package telemetry + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "time" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/client" + "github.com/databricks/databricks-sdk-go/config" +) + +const ( + // File containing output from the upload process. + UploadLogsFileEnvVar = "DATABRICKS_CLI_TELEMETRY_LOGFILE" + + // File containing the PID of the telemetry upload process. + PidFileEnvVar = "DATABRICKS_CLI_TELEMETRY_PIDFILE" + + // Environment variable to disable telemetry. If this is set to any value, telemetry + // will be disabled. + DisableEnvVar = "DATABRICKS_CLI_DISABLE_TELEMETRY" +) + +type UploadConfig struct { + Logs []protos.FrontendLog `json:"logs"` +} + +// The API requires the logs to be JSON encoded strings. This function reads the +// logs from stdin and returns them as a slice of JSON encoded strings. +func readLogs(stdin io.Reader) ([]string, error) { + b, err := io.ReadAll(stdin) + if err != nil { + return nil, fmt.Errorf("failed to read from stdin: %s", err) + } + + in := UploadConfig{} + err = json.Unmarshal(b, &in) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal input: %s", err) + } + + if len(in.Logs) == 0 { + return nil, errors.New("No logs to upload") + } + + protoLogs := make([]string, len(in.Logs)) + for i, log := range in.Logs { + b, err := json.Marshal(log) + if err != nil { + return nil, fmt.Errorf("failed to marshal log: %s", err) + } + protoLogs[i] = string(b) + } + + return protoLogs, nil +} + +// Upload reads telemetry logs from stdin and uploads them to the telemetry endpoint. +// This function is always expected to be called in a separate child process from +// the main CLI process. +func Upload(ctx context.Context) (*ResponseBody, error) { + logs, err := readLogs(os.Stdin) + if err != nil { + return nil, err + } + + // Parent process is responsible for setting environment variables to + // configure authentication. + apiClient, err := client.New(&config.Config{}) + if err != nil { + return nil, fmt.Errorf("Failed to create API client: %s\n", err) + } + + var resp *ResponseBody + + // Only try uploading logs for a maximum of 3 times. + for i := range 3 { + resp, err = attempt(ctx, apiClient, logs) + + // All logs were uploaded successfully. + if err == nil && resp.NumProtoSuccess >= int64(len(logs)) { + return resp, nil + } + + // Partial success. Retry. + if err == nil && resp.NumProtoSuccess < int64(len(logs)) { + log.Warnf(ctx, "Attempt %d was a partial success. Number of logs uploaded: %d out of %d\n", i+1, resp.NumProtoSuccess, len(logs)) + time.Sleep(2 * time.Second) + continue + } + + // We retry for all 5xx responses. We explicitly omit 503 in the predicate here + // because it is already automatically retried in the SDK layer. + // ref: https://github.com/databricks/databricks-sdk-go/blob/cdb28002afacb8b762348534a4c4040a9f19c24b/apierr/errors.go#L91 + var apiErr *apierr.APIError + if errors.As(err, &apiErr) && apiErr.StatusCode >= 500 && apiErr.StatusCode != 503 { + log.Warnf(ctx, "Attempt %d failed due to a server side error. Retrying status code: %d\n", i+1, apiErr.StatusCode) + time.Sleep(2 * time.Second) + continue + } + } + + return resp, errors.New("upload did not succeed after three attempts") +} + +func attempt(ctx context.Context, apiClient *client.DatabricksClient, protoLogs []string) (*ResponseBody, error) { + resp := &ResponseBody{} + err := apiClient.Do(ctx, http.MethodPost, "/telemetry-ext", nil, nil, RequestBody{ + UploadTime: time.Now().UnixMilli(), + // There is a bug in the `/telemetry-ext` API which requires us to + // send an empty array for the `Items` field. Otherwise the API returns + // a 500. + Items: []string{}, + ProtoLogs: protoLogs, + }, resp) + if err != nil { + return nil, err + } + + if len(resp.Errors) > 0 { + return nil, fmt.Errorf("uploading telemetry failed: %v", resp.Errors) + } + + return resp, nil +} diff --git a/libs/telemetry/upload_test.go b/libs/telemetry/upload_test.go new file mode 100644 index 0000000000..76a4b807e0 --- /dev/null +++ b/libs/telemetry/upload_test.go @@ -0,0 +1,206 @@ +package telemetry + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/databricks/cli/libs/testserver" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func configureStdin(t *testing.T) { + logs := []protos.FrontendLog{ + { + FrontendLogEventID: uuid.New().String(), + Entry: protos.FrontendLogEntry{ + DatabricksCliLog: protos.DatabricksCliLog{ + CliTestEvent: &protos.CliTestEvent{Name: protos.DummyCliEnumValue1}, + }, + }, + }, + { + FrontendLogEventID: uuid.New().String(), + Entry: protos.FrontendLogEntry{ + DatabricksCliLog: protos.DatabricksCliLog{ + CliTestEvent: &protos.CliTestEvent{Name: protos.DummyCliEnumValue2}, + }, + }, + }, + } + + processIn := UploadConfig{ + Logs: logs, + } + + b, err := json.Marshal(processIn) + require.NoError(t, err) + + tmpDir := t.TempDir() + testutil.WriteFile(t, filepath.Join(tmpDir, "stdin"), string(b)) + + f, err := os.OpenFile(filepath.Join(tmpDir, "stdin"), os.O_RDONLY, 0o644) + require.NoError(t, err) + + // Redirect stdin to the file containing the telemetry logs. + old := os.Stdin + os.Stdin = f + t.Cleanup(func() { + f.Close() + os.Stdin = old + }) +} + +func TestTelemetryUploadRetriesOnPartialSuccess(t *testing.T) { + server := testserver.New(t) + t.Cleanup(server.Close) + + count := 0 + server.Handle("POST", "/telemetry-ext", func(req testserver.Request) any { + count++ + if count == 1 { + return ResponseBody{ + NumProtoSuccess: 1, + } + } + if count == 2 { + return ResponseBody{ + NumProtoSuccess: 2, + } + } + return nil + }) + + t.Setenv("DATABRICKS_HOST", server.URL) + t.Setenv("DATABRICKS_TOKEN", "token") + + configureStdin(t) + + resp, err := Upload(context.Background()) + require.NoError(t, err) + assert.Equal(t, int64(2), resp.NumProtoSuccess) + assert.Equal(t, 2, count) +} + +func uploadRetriesFor(t *testing.T, statusCode int) { + server := testserver.New(t) + t.Cleanup(server.Close) + + count := 0 + server.Handle("POST", "/telemetry-ext", func(req testserver.Request) any { + count++ + if count == 1 { + return testserver.Response{ + StatusCode: statusCode, + Body: apierr.APIError{ + StatusCode: statusCode, + Message: "Some error", + }, + } + } + if count == 2 { + return ResponseBody{ + NumProtoSuccess: 2, + } + } + return nil + }) + + t.Setenv("DATABRICKS_HOST", server.URL) + t.Setenv("DATABRICKS_TOKEN", "token") + + configureStdin(t) + + resp, err := Upload(context.Background()) + require.NoError(t, err) + assert.Equal(t, int64(2), resp.NumProtoSuccess) + assert.Equal(t, 2, count) +} + +func TestTelemetryUploadRetriesForStatusCodes(t *testing.T) { + // These retries happen in the CLI itself since the SDK does not automatically + // retry for 5xx errors. + uploadRetriesFor(t, 500) + uploadRetriesFor(t, 504) + + // These retries happen on the SDK layer. + // ref: https://github.com/databricks/databricks-sdk-go/blob/cdb28002afacb8b762348534a4c4040a9f19c24b/apierr/errors.go#L91 + uploadRetriesFor(t, 503) + uploadRetriesFor(t, 429) +} + +func TestTelemetryUploadMaxRetries(t *testing.T) { + server := testserver.New(t) + t.Cleanup(server.Close) + count := 0 + + server.Handle("POST", "/telemetry-ext", func(req testserver.Request) any { + count++ + return ResponseBody{ + NumProtoSuccess: 1, + } + }) + + t.Setenv("DATABRICKS_HOST", server.URL) + t.Setenv("DATABRICKS_TOKEN", "token") + + configureStdin(t) + + _, err := Upload(context.Background()) + assert.EqualError(t, err, "upload did not succeed after three attempts") + assert.Equal(t, 3, count) +} + +func TestReadFiles(t *testing.T) { + raw := `{ + "logs": [ + { + "frontend_log_event_id": "1", + "entry": { + "databricks_cli_log": { + "cli_test_event": { + "name": "DummyCliEnumValue1" + } + } + } + }, + { + "frontend_log_event_id": "2", + "entry": { + "databricks_cli_log": { + "cli_test_event": { + "name": "DummyCliEnumValue2" + } + } + } + } + ] +}` + + r := strings.NewReader(raw) + logs, err := readLogs(r) + require.NoError(t, err) + + assert.Equal(t, []string{ + `{"frontend_log_event_id":"1","entry":{"databricks_cli_log":{"cli_test_event":{"name":"DummyCliEnumValue1"}}}}`, + `{"frontend_log_event_id":"2","entry":{"databricks_cli_log":{"cli_test_event":{"name":"DummyCliEnumValue2"}}}}`, + }, logs) +} + +func TestReadFilesWithNoLogs(t *testing.T) { + raw := `{ + "logs": [] +}` + + r := strings.NewReader(raw) + _, err := readLogs(r) + assert.EqualError(t, err, "No logs to upload") +}