diff --git a/acceptance/bin/wait_file b/acceptance/bin/wait_file new file mode 100755 index 0000000000..5fa1ab69c6 --- /dev/null +++ b/acceptance/bin/wait_file @@ -0,0 +1,27 @@ +#!/bin/bash + +wait_file() { + local file_path="$1" + local max_attempts=100 + local attempt=0 + + while [ $attempt -lt $max_attempts ]; do + if [ -e "$file_path" ]; then + echo "File $file_path exists" + return 0 + fi + sleep 0.1 + attempt=$((attempt + 1)) + done + + echo "Timeout: File $file_path did not appear within 10 seconds" + return 1 +} + +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +wait_file "$1" +exit $? diff --git a/acceptance/bin/wait_pid b/acceptance/bin/wait_pid new file mode 100755 index 0000000000..f1375bf51c --- /dev/null +++ b/acceptance/bin/wait_pid @@ -0,0 +1,42 @@ +#!/bin/bash + + +# wait in bash only works for processes that are direct children to the calling +# shell. This script is more general purpose. +wait_pid() { + local pid=$1 + local max_attempts=100 # 100 * 0.1 seconds = 10 seconds + local attempt=0 + local sleep_time=0.1 + + while [ $attempt -lt $max_attempts ]; do + if [[ "$OSTYPE" == "msys"* || "$OSTYPE" == "cygwin"* ]]; then + # Windows approach + if ! tasklist | grep -q $pid; then + echo "Process has ended" + return 0 + fi + else + # Linux/macOS approach + if ! kill -0 $pid 2>/dev/null; then + echo "Process has ended" + return 0 + fi + fi + + sleep $sleep_time + attempt=$((attempt + 1)) + done + + echo "Timeout: Process $pid did not end within 10 seconds" + return 1 +} + +# Usage +if [ $# -eq 0 ]; then + echo "Usage: $0 " + exit 1 +fi + +wait_pid $1 +exit $? diff --git a/acceptance/telemetry/dummy-error/out.requests.txt b/acceptance/telemetry/dummy-error/out.requests.txt new file mode 100644 index 0000000000..a94ed3fd73 --- /dev/null +++ b/acceptance/telemetry/dummy-error/out.requests.txt @@ -0,0 +1,12 @@ +{ + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"telemetry_dummy\",\"operating_system\":\"OS\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"telemetry_dummy\",\"operating_system\":\"OS\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/dummy-error/out.upload.txt b/acceptance/telemetry/dummy-error/out.upload.txt new file mode 100644 index 0000000000..2881d09aa9 --- /dev/null +++ b/acceptance/telemetry/dummy-error/out.upload.txt @@ -0,0 +1,2 @@ +error: Failed to upload telemetry logs: Endpoint not implemented. + diff --git a/acceptance/telemetry/dummy-error/output.txt b/acceptance/telemetry/dummy-error/output.txt new file mode 100644 index 0000000000..4fe94d1ad7 --- /dev/null +++ b/acceptance/telemetry/dummy-error/output.txt @@ -0,0 +1,5 @@ + +>>> [CLI] telemetry dummy +waiting for telemetry process to finish +File ./telemetry.pid exists +Process has ended diff --git a/acceptance/telemetry/dummy-error/script b/acceptance/telemetry/dummy-error/script new file mode 100644 index 0000000000..8f22b50d93 --- /dev/null +++ b/acceptance/telemetry/dummy-error/script @@ -0,0 +1,15 @@ +export DATABRICKS_CLI_TELEMETRY_PID_FILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_UPLOAD_LOGS_FILE=./out.upload.txt + +# This test ensures that the main CLI command does not error even if +# telemetry upload fails. +trace $CLI telemetry dummy + +echo "waiting for telemetry process to finish" + +# Wait for the child telemetry process to finish +wait_file ./telemetry.pid +wait_pid $(cat ./telemetry.pid) + +# cleanup the pid file +rm -f ./telemetry.pid diff --git a/acceptance/telemetry/dummy-error/test.toml b/acceptance/telemetry/dummy-error/test.toml new file mode 100644 index 0000000000..92a6472729 --- /dev/null +++ b/acceptance/telemetry/dummy-error/test.toml @@ -0,0 +1,13 @@ +[[Server]] +Pattern = "POST /telemetry-ext" +Response.Body = ''' +{ + "error_code": "ERROR_CODE", + "message": "Endpoint not implemented." +} +''' +Response.StatusCode = 501 + +[[Repls]] +Old = 'execution_time_ms\\\":\d{1,5},' +New = 'execution_time_ms\":\"SMALL_INT\",' diff --git a/acceptance/telemetry/dummy/out.requests.txt b/acceptance/telemetry/dummy/out.requests.txt new file mode 100644 index 0000000000..a94ed3fd73 --- /dev/null +++ b/acceptance/telemetry/dummy/out.requests.txt @@ -0,0 +1,12 @@ +{ + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"telemetry_dummy\",\"operating_system\":\"OS\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"execution_context\":{\"cmd_exec_id\":\"[UUID]\",\"version\":\"[DEV_VERSION]\",\"command\":\"telemetry_dummy\",\"operating_system\":\"OS\",\"execution_time_ms\":\"SMALL_INT\",\"exit_code\":0},\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/dummy/out.upload.txt b/acceptance/telemetry/dummy/out.upload.txt new file mode 100644 index 0000000000..7ee6e3b6ad --- /dev/null +++ b/acceptance/telemetry/dummy/out.upload.txt @@ -0,0 +1,3 @@ +Telemetry logs uploaded successfully +Response: +{"errors":null,"numProtoSuccess":2} diff --git a/acceptance/telemetry/dummy/output.txt b/acceptance/telemetry/dummy/output.txt new file mode 100644 index 0000000000..4fe94d1ad7 --- /dev/null +++ b/acceptance/telemetry/dummy/output.txt @@ -0,0 +1,5 @@ + +>>> [CLI] telemetry dummy +waiting for telemetry process to finish +File ./telemetry.pid exists +Process has ended diff --git a/acceptance/telemetry/dummy/script b/acceptance/telemetry/dummy/script new file mode 100644 index 0000000000..0c0f529502 --- /dev/null +++ b/acceptance/telemetry/dummy/script @@ -0,0 +1,13 @@ +export DATABRICKS_CLI_TELEMETRY_PID_FILE=./telemetry.pid +export DATABRICKS_CLI_TELEMETRY_UPLOAD_LOGS_FILE=./out.upload.txt + +trace $CLI telemetry dummy + +echo "waiting for telemetry process to finish" + +# Wait for the child telemetry process to finish +wait_file ./telemetry.pid +wait_pid $(cat ./telemetry.pid) + +# cleanup the pid file +rm -f ./telemetry.pid diff --git a/acceptance/telemetry/dummy/test.toml b/acceptance/telemetry/dummy/test.toml new file mode 100644 index 0000000000..4826f42b0e --- /dev/null +++ b/acceptance/telemetry/dummy/test.toml @@ -0,0 +1,11 @@ +[[Server]] +Pattern = "POST /telemetry-ext" +Response.Body = ''' +{ + "numProtoSuccess": 2 +} +''' + +[[Repls]] +Old = 'execution_time_ms\\\":\d{1,5},' +New = 'execution_time_ms\":\"SMALL_INT\",' diff --git a/acceptance/telemetry/test.toml b/acceptance/telemetry/test.toml new file mode 100644 index 0000000000..e4a5776817 --- /dev/null +++ b/acceptance/telemetry/test.toml @@ -0,0 +1,10 @@ +LocalOnly = true +RecordRequests = true + +[[Repls]] +Old = '17\d{11}' +New = '"UNIX_TIME_MILLIS"' + +[[Repls]] +Old = 'darwin|linux|windows' +New = 'OS' diff --git a/acceptance/telemetry/upload/out.requests.txt b/acceptance/telemetry/upload/out.requests.txt new file mode 100644 index 0000000000..9264d8bfdb --- /dev/null +++ b/acceptance/telemetry/upload/out.requests.txt @@ -0,0 +1,12 @@ +{ + "method": "POST", + "path": "/telemetry-ext", + "body": { + "uploadTime": "UNIX_TIME_MILLIS", + "items": [], + "protoLogs": [ + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"cli_test_event\":{\"name\":\"VALUE1\"}}}}", + "{\"frontend_log_event_id\":\"[UUID]\",\"entry\":{\"databricks_cli_log\":{\"cli_test_event\":{\"name\":\"VALUE2\"}}}}" + ] + } +} diff --git a/acceptance/telemetry/upload/out.upload.txt b/acceptance/telemetry/upload/out.upload.txt new file mode 100644 index 0000000000..7ee6e3b6ad --- /dev/null +++ b/acceptance/telemetry/upload/out.upload.txt @@ -0,0 +1,3 @@ +Telemetry logs uploaded successfully +Response: +{"errors":null,"numProtoSuccess":2} diff --git a/acceptance/telemetry/upload/output.txt b/acceptance/telemetry/upload/output.txt new file mode 100644 index 0000000000..a7a85871b7 --- /dev/null +++ b/acceptance/telemetry/upload/output.txt @@ -0,0 +1,2 @@ + +>>> [CLI] telemetry upload diff --git a/acceptance/telemetry/upload/script b/acceptance/telemetry/upload/script new file mode 100644 index 0000000000..7312d83593 --- /dev/null +++ b/acceptance/telemetry/upload/script @@ -0,0 +1,5 @@ +export DATABRICKS_CLI_TELEMETRY_UPLOAD_LOGS_FILE=./out.upload.txt + +# This command / test cannot be run in inprocess / debug mode. This is because +# it does not go through the [root.Execute] function. +trace $CLI telemetry upload < stdin diff --git a/acceptance/telemetry/upload/stdin b/acceptance/telemetry/upload/stdin new file mode 100644 index 0000000000..6874ffaeef --- /dev/null +++ b/acceptance/telemetry/upload/stdin @@ -0,0 +1,24 @@ +{ + "logs": [ + { + "frontend_log_event_id": "BB79BB52-96F6-42C5-9E44-E63EEA84888D", + "entry": { + "databricks_cli_log": { + "cli_test_event": { + "name": "VALUE1" + } + } + } + }, + { + "frontend_log_event_id": "A7F597B0-66D1-462D-824C-C5C706F232E8", + "entry": { + "databricks_cli_log": { + "cli_test_event": { + "name": "VALUE2" + } + } + } + } + ] +} diff --git a/acceptance/telemetry/upload/test.toml b/acceptance/telemetry/upload/test.toml new file mode 100644 index 0000000000..05127c25b6 --- /dev/null +++ b/acceptance/telemetry/upload/test.toml @@ -0,0 +1,7 @@ +[[Server]] +Pattern = "POST /telemetry-ext" +Response.Body = ''' +{ + "numProtoSuccess": 2 +} +''' diff --git a/cmd/cmd.go b/cmd/cmd.go index 5d835409f9..060a89f2b3 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/cmd/labs" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/sync" + "github.com/databricks/cli/cmd/telemetry" "github.com/databricks/cli/cmd/version" "github.com/databricks/cli/cmd/workspace" "github.com/spf13/cobra" @@ -74,6 +75,6 @@ func New(ctx context.Context) *cobra.Command { cli.AddCommand(labs.New(ctx)) cli.AddCommand(sync.New()) cli.AddCommand(version.New()) - + cli.AddCommand(telemetry.New()) return cli } diff --git a/cmd/root/root.go b/cmd/root/root.go index d7adf47f4a..0d2489126e 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -2,16 +2,26 @@ package root import ( "context" + "encoding/json" "errors" "fmt" "log/slog" "os" + "os/exec" + "runtime" + "slices" + "strconv" "strings" + "time" "github.com/databricks/cli/internal/build" + "github.com/databricks/cli/libs/auth" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/telemetry/protos" "github.com/spf13/cobra" ) @@ -73,9 +83,6 @@ func New(ctx context.Context) *cobra.Command { // get the context back ctx = cmd.Context() - // Detect if the CLI is running on DBR and store this on the context. - ctx = dbr.DetectRuntime(ctx) - // Configure our user agent with the command that's about to be executed. ctx = withCommandInUserAgent(ctx, cmd) ctx = withCommandExecIdInUserAgent(ctx) @@ -97,7 +104,9 @@ func flagErrorFunc(c *cobra.Command, err error) error { // Execute adds all child commands to the root command and sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. func Execute(ctx context.Context, cmd *cobra.Command) error { - // TODO: deferred panic recovery + ctx = telemetry.WithNewLogger(ctx) + ctx = dbr.DetectRuntime(ctx) + start := time.Now() // Run the command cmd, err := cmd.ExecuteContextC(ctx) @@ -126,5 +135,110 @@ func Execute(ctx context.Context, cmd *cobra.Command) error { } } + end := time.Now() + exitCode := 0 + if err != nil { + exitCode = 1 + } + + uploadTelemetry(cmd.Context(), commandString(cmd), start, end, exitCode) return err } + +// We want child telemetry processes to inherit environment variables like $HOME or $HTTPS_PROXY +// because they influence auth resolution. +func inheritEnvVars() []string { + base := os.Environ() + out := []string{} + authEnvVars := auth.EnvVars() + + // Remove any existing auth environment variables. This is done because + // the CLI offers multiple modalities of configuring authentication like + // `--profile` or `DATABRICKS_CONFIG_PROFILE` or `profile: ` in the + // bundle config file. + // + // Each of these modalities have different priorities and thus we don't want + // any auth configuration to piggyback into the child process environment. + // + // This is a precaution to avoid conflicting auth configurations being passed + // to the child telemetry process. + for _, v := range base { + k, _, found := strings.Cut(v, "=") + if !found { + continue + } + if slices.Contains(authEnvVars, k) { + continue + } + out = append(out, v) + } + + return out +} + +func uploadTelemetry(ctx context.Context, cmdStr string, start, end time.Time, exitCode int) { + // Nothing to upload. + if !telemetry.HasLogs(ctx) { + return + } + + telemetry.SetExecutionContext(ctx, protos.ExecutionContext{ + CmdExecID: cmdExecId, + Version: build.GetInfo().Version, + Command: cmdStr, + OperatingSystem: runtime.GOOS, + DbrVersion: env.Get(ctx, dbr.EnvVarName), + ExecutionTimeMs: end.Sub(start).Milliseconds(), + ExitCode: int64(exitCode), + }) + + logs := telemetry.GetLogs(ctx) + + in := telemetry.UploadConfig{ + Logs: logs, + } + + execPath, err := os.Executable() + if err != nil { + log.Debugf(ctx, "failed to get executable path: %s", err) + } + telemetryCmd := exec.Command(execPath, "telemetry", "upload") + telemetryCmd.Env = inheritEnvVars() + for k, v := range auth.Env(ConfigUsed(ctx)) { + telemetryCmd.Env = append(telemetryCmd.Env, fmt.Sprintf("%s=%s", k, v)) + } + + b, err := json.Marshal(in) + if err != nil { + log.Debugf(ctx, "failed to marshal telemetry logs: %s", err) + return + } + + stdin, err := telemetryCmd.StdinPipe() + if err != nil { + log.Debugf(ctx, "failed to create stdin pipe for telemetry worker: %s", err) + } + + err = telemetryCmd.Start() + if err != nil { + log.Debugf(ctx, "failed to start telemetry worker: %s", err) + return + } + + if pidFilePath := env.Get(ctx, telemetry.PidFileEnvVar); pidFilePath != "" { + err = os.WriteFile(pidFilePath, []byte(strconv.Itoa(telemetryCmd.Process.Pid)), 0o644) + if err != nil { + log.Debugf(ctx, "failed to write telemetry worker PID file: %s", err) + } + } + + _, err = stdin.Write(b) + if err != nil { + log.Debugf(ctx, "failed to write to telemetry worker: %s", err) + } + + err = stdin.Close() + if err != nil { + log.Debugf(ctx, "failed to close stdin for telemetry worker: %s", err) + } +} diff --git a/cmd/root/user_agent_command_exec_id.go b/cmd/root/user_agent_command_exec_id.go index 3bf32b703f..e3416983d0 100644 --- a/cmd/root/user_agent_command_exec_id.go +++ b/cmd/root/user_agent_command_exec_id.go @@ -7,8 +7,10 @@ import ( "github.com/google/uuid" ) +var cmdExecId = uuid.New().String() + func withCommandExecIdInUserAgent(ctx context.Context) context.Context { // A UUID that will allow us to correlate multiple API requests made by // the same CLI invocation. - return useragent.InContext(ctx, "cmd-exec-id", uuid.New().String()) + return useragent.InContext(ctx, "cmd-exec-id", cmdExecId) } diff --git a/cmd/telemetry/dummy.go b/cmd/telemetry/dummy.go new file mode 100644 index 0000000000..792cf5de86 --- /dev/null +++ b/cmd/telemetry/dummy.go @@ -0,0 +1,31 @@ +package telemetry + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/spf13/cobra" +) + +func newDummyCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "dummy", + Short: "log dummy telemetry events", + Long: "Fire a test telemetry event against the configured Databricks workspace.", + Hidden: true, + PreRunE: root.MustWorkspaceClient, + } + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + for _, v := range []string{"VALUE1", "VALUE2"} { + telemetry.Log(cmd.Context(), protos.DatabricksCliLog{ + CliTestEvent: &protos.CliTestEvent{ + Name: protos.DummyCliEnum(v), + }, + }) + } + return nil + } + + return cmd +} diff --git a/cmd/telemetry/telemetry.go b/cmd/telemetry/telemetry.go new file mode 100644 index 0000000000..888aa318fb --- /dev/null +++ b/cmd/telemetry/telemetry.go @@ -0,0 +1,16 @@ +package telemetry + +import ( + "github.com/spf13/cobra" +) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "telemetry", + Short: "", + Hidden: true, + } + + cmd.AddCommand(newDummyCommand()) + return cmd +} diff --git a/libs/auth/env.go b/libs/auth/env.go index c58cc53e3f..66744d6a1e 100644 --- a/libs/auth/env.go +++ b/libs/auth/env.go @@ -24,3 +24,17 @@ func Env(cfg *config.Config) map[string]string { return out } + +func EnvVars() []string { + out := []string{} + + for _, attr := range config.ConfigAttributes { + if len(attr.EnvVars) == 0 { + continue + } + + out = append(out, attr.EnvVars[0]) + } + + return out +} diff --git a/libs/auth/env_test.go b/libs/auth/env_test.go index be1cfc7aca..b147b74bb2 100644 --- a/libs/auth/env_test.go +++ b/libs/auth/env_test.go @@ -40,3 +40,40 @@ func TestAuthEnv(t *testing.T) { out := Env(in) assert.Equal(t, expected, out) } + +func TestAuthEnvVars(t *testing.T) { + expected := []string{ + "DATABRICKS_HOST", + "DATABRICKS_CLUSTER_ID", + "DATABRICKS_WAREHOUSE_ID", + "DATABRICKS_SERVERLESS_COMPUTE_ID", + "DATABRICKS_METADATA_SERVICE_URL", + "DATABRICKS_ACCOUNT_ID", + "DATABRICKS_TOKEN", + "DATABRICKS_USERNAME", + "DATABRICKS_PASSWORD", + "DATABRICKS_CONFIG_PROFILE", + "DATABRICKS_CONFIG_FILE", + "DATABRICKS_GOOGLE_SERVICE_ACCOUNT", + "GOOGLE_CREDENTIALS", + "DATABRICKS_AZURE_RESOURCE_ID", + "ARM_USE_MSI", + "ARM_CLIENT_SECRET", + "ARM_CLIENT_ID", + "ARM_TENANT_ID", + "ACTIONS_ID_TOKEN_REQUEST_URL", + "ACTIONS_ID_TOKEN_REQUEST_TOKEN", + "ARM_ENVIRONMENT", + "DATABRICKS_AZURE_LOGIN_APP_ID", + "DATABRICKS_CLIENT_ID", + "DATABRICKS_CLIENT_SECRET", + "DATABRICKS_CLI_PATH", + "DATABRICKS_AUTH_TYPE", + "DATABRICKS_DEBUG_TRUNCATE_BYTES", + "DATABRICKS_DEBUG_HEADERS", + "DATABRICKS_RATE_LIMIT", + } + + out := EnvVars() + assert.Equal(t, expected, out) +} diff --git a/libs/dbr/detect.go b/libs/dbr/detect.go index d8b4dfe20e..12b83dc8c8 100644 --- a/libs/dbr/detect.go +++ b/libs/dbr/detect.go @@ -11,6 +11,8 @@ import ( // Dereference [os.Stat] to allow mocking in tests. var statFunc = os.Stat +const EnvVarName = "DATABRICKS_RUNTIME_VERSION" + // detect returns true if the current process is running on a Databricks Runtime. // Its return value is meant to be cached in the context. func detect(ctx context.Context) bool { @@ -21,7 +23,7 @@ func detect(ctx context.Context) bool { } // Databricks Runtime always has the DATABRICKS_RUNTIME_VERSION environment variable set. - if value, ok := env.Lookup(ctx, "DATABRICKS_RUNTIME_VERSION"); !ok || value == "" { + if value, ok := env.Lookup(ctx, EnvVarName); !ok || value == "" { return false } diff --git a/libs/telemetry/context.go b/libs/telemetry/context.go new file mode 100644 index 0000000000..e556e462cd --- /dev/null +++ b/libs/telemetry/context.go @@ -0,0 +1,25 @@ +package telemetry + +import ( + "context" + "errors" +) + +// Private type to store the telemetry logger in the context +type telemetryLogger int + +// Key to store the telemetry logger in the context +var telemetryLoggerKey telemetryLogger + +func WithNewLogger(ctx context.Context) context.Context { + return context.WithValue(ctx, telemetryLoggerKey, &logger{}) +} + +func fromContext(ctx context.Context) *logger { + v := ctx.Value(telemetryLoggerKey) + if v == nil { + panic(errors.New("telemetry logger not found in the context")) + } + + return v.(*logger) +} diff --git a/libs/telemetry/logger.go b/libs/telemetry/logger.go new file mode 100644 index 0000000000..4fbce2f52a --- /dev/null +++ b/libs/telemetry/logger.go @@ -0,0 +1,50 @@ +package telemetry + +import ( + "context" + + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/google/uuid" +) + +func Log(ctx context.Context, event protos.DatabricksCliLog) { + fromContext(ctx).log(event) +} + +func GetLogs(ctx context.Context) []protos.FrontendLog { + return fromContext(ctx).getLogs() +} + +func HasLogs(ctx context.Context) bool { + return len(fromContext(ctx).getLogs()) > 0 +} + +func SetExecutionContext(ctx context.Context, ec protos.ExecutionContext) { + fromContext(ctx).setExecutionContext(ec) +} + +type logger struct { + logs []protos.FrontendLog +} + +func (l *logger) log(event protos.DatabricksCliLog) { + if l.logs == nil { + l.logs = make([]protos.FrontendLog, 0) + } + l.logs = append(l.logs, protos.FrontendLog{ + FrontendLogEventID: uuid.New().String(), + Entry: protos.FrontendLogEntry{ + DatabricksCliLog: event, + }, + }) +} + +func (l *logger) getLogs() []protos.FrontendLog { + return l.logs +} + +func (l *logger) setExecutionContext(ec protos.ExecutionContext) { + for i := range l.logs { + l.logs[i].Entry.DatabricksCliLog.ExecutionContext = &ec + } +} diff --git a/libs/telemetry/protos/databricks_cli_log.go b/libs/telemetry/protos/databricks_cli_log.go index 9e4e59596f..64baa6b384 100644 --- a/libs/telemetry/protos/databricks_cli_log.go +++ b/libs/telemetry/protos/databricks_cli_log.go @@ -24,10 +24,12 @@ type ExecutionContext struct { FromWebTerminal bool `json:"from_web_terminal,omitempty"` // Time taken for the CLI command to execute. - ExecutionTimeMs int64 `json:"execution_time_ms,omitempty"` + // We want to serialize the zero value as well so the omitempty tag is not set. + ExecutionTimeMs int64 `json:"execution_time_ms"` // Exit code of the CLI command. - ExitCode int64 `json:"exit_code,omitempty"` + // We want to serialize the zero value as well so the omitempty tag is not set. + ExitCode int64 `json:"exit_code"` } type CliTestEvent struct { diff --git a/libs/telemetry/upload.go b/libs/telemetry/upload.go new file mode 100644 index 0000000000..554e5dab76 --- /dev/null +++ b/libs/telemetry/upload.go @@ -0,0 +1,99 @@ +package telemetry + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "time" + + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/databricks/databricks-sdk-go/client" + "github.com/databricks/databricks-sdk-go/config" +) + +const ( + // File containing debug logs from the upload process. + UploadLogsFileEnvVar = "DATABRICKS_CLI_TELEMETRY_UPLOAD_LOGS_FILE" + + // File containing the PID of the telemetry upload process. + PidFileEnvVar = "DATABRICKS_CLI_TELEMETRY_PID_FILE" +) + +type UploadConfig struct { + Logs []protos.FrontendLog `json:"logs"` +} + +// Upload reads telemetry logs from stdin and uploads them to the telemetry endpoint. +// This function is always expected to be called in a separate child process from +// the main CLI process. +func Upload() (*ResponseBody, error) { + var err error + + b, err := io.ReadAll(os.Stdin) + if err != nil { + return nil, fmt.Errorf("failed to read from stdin: %s\n", err) + } + + in := UploadConfig{} + err = json.Unmarshal(b, &in) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal input: %s\n", err) + } + + if len(in.Logs) == 0 { + return nil, fmt.Errorf("No logs to upload: %s\n", err) + } + + protoLogs := make([]string, len(in.Logs)) + for i, log := range in.Logs { + b, err := json.Marshal(log) + if err != nil { + return nil, fmt.Errorf("failed to marshal log: %s\n", err) + } + protoLogs[i] = string(b) + } + + // Parent process is responsible for setting environment variables to + // configure authentication. + apiClient, err := client.New(&config.Config{}) + if err != nil { + return nil, fmt.Errorf("Failed to create API client: %s\n", err) + } + + // Set a maximum total time to try telemetry uploads. + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + resp := &ResponseBody{} + for { + select { + case <-ctx.Done(): + return nil, errors.New("Failed to flush telemetry log due to timeout") + + default: + // Proceed + } + + // Log the CLI telemetry events. + err := apiClient.Do(ctx, http.MethodPost, "/telemetry-ext", nil, nil, RequestBody{ + UploadTime: time.Now().UnixMilli(), + Items: []string{}, + ProtoLogs: protoLogs, + }, resp) + if err != nil { + return nil, fmt.Errorf("Failed to upload telemetry logs: %s\n", err) + } + + if len(resp.Errors) > 0 { + return nil, fmt.Errorf("Failed to upload telemetry logs: %s\n", resp.Errors) + } + + if resp.NumProtoSuccess == int64(len(in.Logs)) { + return resp, nil + } + } +} diff --git a/libs/telemetry/upload_test.go b/libs/telemetry/upload_test.go new file mode 100644 index 0000000000..6a52cfbd3c --- /dev/null +++ b/libs/telemetry/upload_test.go @@ -0,0 +1,85 @@ +package telemetry + +import ( + "encoding/json" + "net/http" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/databricks/cli/libs/testserver" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTelemetryUpload(t *testing.T) { + server := testserver.New(t) + t.Cleanup(server.Close) + + count := 0 + server.Handle("POST", "/telemetry-ext", func(_ *testserver.FakeWorkspace, req *http.Request) (resp any, statusCode int) { + count++ + if count == 1 { + return ResponseBody{ + NumProtoSuccess: 1, + }, http.StatusOK + } + if count == 2 { + return ResponseBody{ + NumProtoSuccess: 2, + }, http.StatusOK + } + return nil, http.StatusInternalServerError + }) + + t.Setenv("DATABRICKS_HOST", server.URL) + t.Setenv("DATABRICKS_TOKEN", "token") + + logs := []protos.FrontendLog{ + { + FrontendLogEventID: uuid.New().String(), + Entry: protos.FrontendLogEntry{ + DatabricksCliLog: protos.DatabricksCliLog{ + CliTestEvent: &protos.CliTestEvent{Name: protos.DummyCliEnumValue1}, + }, + }, + }, + { + FrontendLogEventID: uuid.New().String(), + Entry: protos.FrontendLogEntry{ + DatabricksCliLog: protos.DatabricksCliLog{ + CliTestEvent: &protos.CliTestEvent{Name: protos.DummyCliEnumValue2}, + }, + }, + }, + } + + processIn := UploadConfig{ + Logs: logs, + } + + b, err := json.Marshal(processIn) + require.NoError(t, err) + + tmpDir := t.TempDir() + testutil.WriteFile(t, filepath.Join(tmpDir, "stdin"), string(b)) + + fd, err := os.OpenFile(filepath.Join(tmpDir, "stdin"), os.O_RDONLY, 0o644) + require.NoError(t, err) + + // Redirect stdin to the file containing the telemetry logs. + old := os.Stdin + os.Stdin = fd + t.Cleanup(func() { + fd.Close() + os.Stdin = old + }) + + resp, err := Upload() + require.NoError(t, err) + assert.Equal(t, int64(2), resp.NumProtoSuccess) + assert.Equal(t, 2, count) +} diff --git a/main.go b/main.go index c568e6adbd..3150e79ad8 100644 --- a/main.go +++ b/main.go @@ -2,14 +2,53 @@ package main import ( "context" + "encoding/json" + "fmt" + "io" "os" "github.com/databricks/cli/cmd" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/telemetry" ) func main() { ctx := context.Background() + + // Uploading telemetry data spawns a new process. We handle this separately + // from the rest of the CLI commands. + // This is done because [root.Execute] spawns a new process to run the + // "telemetry upload" command if there are logs to be uploaded. Having this outside + // of [root.Execute] ensures that the telemetry upload process is not spawned + // infinitely in a recursive manner. + if len(os.Args) == 3 && os.Args[1] == "telemetry" && os.Args[2] == "upload" { + var err error + + // By default, this command should not write anything to stdout or stderr. + outW := io.Discard + errW := io.Discard + + // If the environment variable is set, redirect stdout to the file. + // This is useful for testing. + if v := os.Getenv(telemetry.UploadLogsFileEnvVar); v != "" { + outW, _ = os.OpenFile(v, os.O_CREATE|os.O_WRONLY, 0o644) + errW = outW + } + + resp, err := telemetry.Upload() + if err != nil { + fmt.Fprintf(errW, "error: %s\n", err) + os.Exit(1) + } + fmt.Fprintf(outW, "Telemetry logs uploaded successfully\n") + fmt.Fprintln(outW, "Response:") + b, err := json.Marshal(resp) + if err == nil { + fmt.Fprintln(outW, string(b)) + } + os.Exit(0) + } + err := root.Execute(ctx, cmd.New(ctx)) if err != nil { os.Exit(1)