From 7f79ea1c59ad0241b99707aa739b1daa0fee8391 Mon Sep 17 00:00:00 2001 From: Valerii Date: Fri, 29 Dec 2023 12:12:13 +0000 Subject: [PATCH] dec 20 tele fix (#49) * WIP * Changelist: * fixed enormous amount of human characters in telemetry * human characters after last completion made no longer counts, as this lead to bugs * telemetry_full_cycle -> 2 functions (request from @oleg) * rh: human bugfix * small fix * robot human fixes * TELEMETRY_TRANSMIT_EACH_N_SECONDS = 3600 * negative robot human workaround * changes requested by @olegklimov * minor * implemented kirill's suggestions * added debug level to _tracing * minor * small fix * removed extra arg from get_add_del_from_texts --- src/main.rs | 5 +- src/telemetry/basic_robot_human.rs | 118 ++++++++++++++++----------- src/telemetry/basic_transmit.rs | 35 ++++---- src/telemetry/snippets_collection.rs | 12 ++- src/telemetry/telemetry_structs.rs | 10 +-- src/telemetry/utils.rs | 51 +++++++++++- 6 files changed, 152 insertions(+), 79 deletions(-) diff --git a/src/main.rs b/src/main.rs index db9a67d2b..7034caacf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use std::io::Write; -use tracing::{error, info}; +use tracing::{error, info, Level}; use tracing_appender; use crate::background_tasks::start_background_tasks; @@ -41,6 +41,7 @@ async fn main() { )) }; let _tracing = tracing_subscriber::fmt() + .with_max_level(Level::DEBUG) .with_writer(logs_writer) .with_target(true) .with_line_number(true) @@ -74,6 +75,6 @@ async fn main() { background_tasks.abort().await; info!("saving telemetry without sending, so should be quick"); - basic_transmit::telemetry_full_cycle(gcx.clone(), true).await; + basic_transmit::basic_telemetry_compress(gcx.clone()).await; info!("bb\n"); } diff --git a/src/telemetry/basic_robot_human.rs b/src/telemetry/basic_robot_human.rs index a37a4fd83..8b0d1c38b 100644 --- a/src/telemetry/basic_robot_human.rs +++ b/src/telemetry/basic_robot_human.rs @@ -1,11 +1,11 @@ use std::collections::HashMap; -use tracing::{error, info}; -use std::sync::Arc; +use tracing::{debug, error, info}; +use std::sync::{Arc, RwLockWriteGuard}; use std::sync::RwLock as StdRwLock; use std::path::PathBuf; +use regex::Regex; use serde::{Deserialize, Serialize}; use serde_json::json; -use regex::Regex; use tokio::sync::RwLock as ARwLock; @@ -15,7 +15,51 @@ use crate::telemetry::telemetry_structs; use crate::telemetry::telemetry_structs::{SnippetTracker, TeleRobotHumanAccum}; -const ROBOT_HUMAN_FILE_STATS_UPDATE_EVERY: i64 = 15; + +pub fn create_robot_human_record_if_not_exists( + tele_robot_human: &mut Vec, + uri: &String, + text: &String +) { + let record_mb = tele_robot_human.iter_mut().find(|stat| stat.uri.eq(uri)); + if record_mb.is_some() { + return; + } + info!("create_robot_human_rec_if_not_exists: new uri {}", uri); + let record = TeleRobotHumanAccum::new( + uri.clone(), + text.clone(), + ); + tele_robot_human.push(record); +} + + +fn update_robot_characters_baseline( + rec: &mut TeleRobotHumanAccum, + snip: &SnippetTracker +) { + let re = Regex::new(r"\s+").unwrap(); + let robot_characters = re.replace_all(&snip.grey_text, "").len() as i64; + rec.robot_characters_acc_baseline += robot_characters; +} + +fn basetext_to_text_leap_calculations( + rec: &mut TeleRobotHumanAccum, + baseline_text: String, + text: &String, +) { + let re = Regex::new(r"\s+").unwrap(); + let (added_characters, removed_characters) = utils::get_add_del_from_texts(&baseline_text, text); + + let (added_characters, _) = utils::get_add_del_chars_from_texts(&removed_characters, &added_characters); + + // let real_characters_added = re.replace_all(&added_characters, "").len() as i64 - re.replace_all(&removed_characters, "").len() as i64; + let human_characters = re.replace_all(&added_characters, "").len() as i64 - rec.robot_characters_acc_baseline; + debug!("human_characters: +{}; robot_characters: +{}", human_characters, rec.robot_characters_acc_baseline); + rec.human_characters += human_characters; + rec.robot_characters += rec.robot_characters_acc_baseline; + rec.robot_characters_acc_baseline = 0; +} pub fn increase_counters_from_finished_snippet( @@ -24,67 +68,40 @@ pub fn increase_counters_from_finished_snippet( text: &String, snip: &SnippetTracker, ) { - // Snippet is finished when it stops being valid for correction (user has changed code in a different place) or it timeouts - fn robot_characters(snip: &SnippetTracker) -> i64 { - let re = Regex::new(r"\s+").unwrap(); - let robot_characters = re.replace_all(&snip.grey_text, "").len() as i64; - info!("increase_counters_from_finished_snippet: ID: {}; robot_characters: {}", snip.snippet_telemetry_id, robot_characters); - robot_characters - } - fn human_characters(rec: &TeleRobotHumanAccum, text: &String) -> i64 { - let re = Regex::new(r"\s+").unwrap(); - let (added_characters, _) = utils::get_add_del_from_texts(&rec.baseline_text, text); - let human_characters = re.replace_all(&added_characters, "").len() as i64 - rec.robot_characters_acc_baseline; - human_characters - } - + info!("snip grey_text: {}", snip.grey_text); let now = chrono::Local::now().timestamp(); - if let Some(rec) = tele_robot_human.iter_mut().find(|stat| stat.uri.eq(uri)) { if rec.used_snip_ids.contains(&snip.snippet_telemetry_id) { return; } - let robot_characters = robot_characters(snip); - rec.robot_characters_acc_baseline += robot_characters; - rec.used_snip_ids.push(snip.snippet_telemetry_id); - if rec.baseline_updated_ts + ROBOT_HUMAN_FILE_STATS_UPDATE_EVERY < now { - // New baseline, increase counters - rec.baseline_updated_ts = now; - rec.human_characters += human_characters(rec, text); - rec.robot_characters += rec.robot_characters_acc_baseline; - rec.robot_characters_acc_baseline = 0; - rec.baseline_text = text.clone(); - } - info!("increasing for {}, human+{}, robot+{}", snip.snippet_telemetry_id, human_characters(rec, text), robot_characters); - } else { - info!("increase_counters_from_finished_snippet: new uri {}", uri); - let init_file_text_mb = snip.inputs.sources.get(&snip.inputs.cursor.file); - if init_file_text_mb.is_none() { - return; + + if rec.used_snip_ids.is_empty() { + rec.model = snip.model.clone(); } - let init_file_text = init_file_text_mb.unwrap(); - tele_robot_human.push(TeleRobotHumanAccum::new( - uri.clone(), - snip.model.clone(), - init_file_text.clone(), - robot_characters(snip), - vec![snip.snippet_telemetry_id], - )); + + update_robot_characters_baseline(rec, snip); + basetext_to_text_leap_calculations(rec, rec.baseline_text.clone(), text); + + rec.used_snip_ids.push(snip.snippet_telemetry_id); + rec.baseline_updated_ts = now; + rec.baseline_text = text.clone(); } } fn compress_robot_human( - data: &mut Vec + storage_locked: &mut RwLockWriteGuard ) -> Vec { - let mut unique_combinations: HashMap<(String, String), Vec<&TeleRobotHumanAccum>> = HashMap::new(); + let mut unique_combinations: HashMap<(String, String), Vec> = HashMap::new(); + + let tele_robot_human = storage_locked.tele_robot_human.clone(); - for accum in data { + for accum in tele_robot_human { let key = (accum.file_extension.clone(), accum.model.clone()); unique_combinations.entry(key).or_default().push(accum); } let mut compressed_vec= vec![]; for (key, entries) in unique_combinations { - info!("compress_robot_human: compressing {} entries for key {:?}", entries.len(), key); + // info!("compress_robot_human: compressing {} entries for key {:?}", entries.len(), key); let mut record = TeleRobotHuman::new( key.0.clone(), key.1.clone() @@ -114,7 +131,10 @@ pub async fn tele_robot_human_compress_to_file( enduser_client_version = cx_locked.cmdline.enduser_client_version.clone(); let mut storage_locked = storage.write().unwrap(); - for rec in compress_robot_human(&mut storage_locked.tele_robot_human) { + for rec in compress_robot_human(&mut storage_locked) { + if rec.model.is_empty() && rec.robot_characters == 0 && rec.human_characters == 0 { + continue; + } let json_dict = serde_json::to_value(rec).unwrap(); records.as_array_mut().unwrap().push(json_dict); } diff --git a/src/telemetry/basic_transmit.rs b/src/telemetry/basic_transmit.rs index a3abecf6e..c219d7b61 100644 --- a/src/telemetry/basic_transmit.rs +++ b/src/telemetry/basic_transmit.rs @@ -16,8 +16,8 @@ use crate::telemetry::utils::{sorted_json_files, read_file, cleanup_old_files, t const TELEMETRY_TRANSMIT_AFTER_START_SECONDS: u64 = 60; -const TELEMETRY_TRANSMIT_EACH_N_SECONDS: u64 = 3600*3; -const TELEMETRY_FILES_KEEP: i32 = 30; +const TELEMETRY_TRANSMIT_EACH_N_SECONDS: u64 = 3600; +const TELEMETRY_FILES_KEEP: i32 = 128; pub async fn send_telemetry_data( @@ -88,11 +88,19 @@ pub async fn send_telemetry_files_to_mothership( } } -pub async fn telemetry_full_cycle( +pub async fn basic_telemetry_compress( global_context: Arc>, - skip_sending_part: bool, -) -> () { +) { info!("basic telemetry compression starts"); + basic_network::compress_basic_telemetry_to_file(global_context.clone()).await; + basic_robot_human::tele_robot_human_compress_to_file(global_context.clone()).await; + basic_comp_counters::compress_tele_completion_to_file(global_context.clone()).await; +} + +pub async fn basic_telemetry_send( + global_context: Arc>, +) -> () { + info!("basic telemetry sending starts"); let caps: Option>>; let api_key: String; let enable_basic_telemetry: bool; // from command line, will not send anything if false @@ -111,11 +119,7 @@ pub async fn telemetry_full_cycle( telemetry_basic_dest = caps.clone().unwrap().read().unwrap().telemetry_basic_dest.clone(); } - basic_network::compress_basic_telemetry_to_file(global_context.clone()).await; - basic_robot_human::tele_robot_human_compress_to_file(global_context.clone()).await; - basic_comp_counters::compress_tele_completion_to_file(global_context.clone()).await; - - if enable_basic_telemetry && !telemetry_basic_dest.is_empty() && !skip_sending_part { + if enable_basic_telemetry && !telemetry_basic_dest.is_empty() { send_telemetry_files_to_mothership( dir_compressed.clone(), dir_sent.clone(), @@ -130,9 +134,6 @@ pub async fn telemetry_full_cycle( if telemetry_basic_dest.is_empty() { info!("basic telemetry dest is empty, skip"); } - if skip_sending_part { - info!("skip_sending_part is true, skip"); - } } cleanup_old_files(dir_compressed, TELEMETRY_FILES_KEEP).await; cleanup_old_files(dir_sent, TELEMETRY_FILES_KEEP).await; @@ -141,9 +142,11 @@ pub async fn telemetry_full_cycle( pub async fn telemetry_background_task( global_context: Arc>, ) -> () { + tokio::time::sleep(tokio::time::Duration::from_secs(TELEMETRY_TRANSMIT_AFTER_START_SECONDS)).await; + basic_telemetry_send(global_context.clone()).await; loop { - tokio::time::sleep(tokio::time::Duration::from_secs(TELEMETRY_TRANSMIT_AFTER_START_SECONDS)).await; - telemetry_full_cycle(global_context.clone(), false).await; - tokio::time::sleep(tokio::time::Duration::from_secs(TELEMETRY_TRANSMIT_EACH_N_SECONDS - TELEMETRY_TRANSMIT_AFTER_START_SECONDS)).await; + tokio::time::sleep(tokio::time::Duration::from_secs(TELEMETRY_TRANSMIT_EACH_N_SECONDS)).await; + basic_telemetry_compress(global_context.clone()).await; + basic_telemetry_send(global_context.clone()).await; } } diff --git a/src/telemetry/snippets_collection.rs b/src/telemetry/snippets_collection.rs index ed69f13f5..e6eab1e2b 100644 --- a/src/telemetry/snippets_collection.rs +++ b/src/telemetry/snippets_collection.rs @@ -3,7 +3,7 @@ use std::sync::RwLock as StdRwLock; use serde::{Serialize, Deserialize}; use tokio::sync::RwLock as ARwLock; -use tracing::info; +use tracing::{debug, info}; use crate::global_context; use crate::completion_cache; @@ -89,7 +89,7 @@ pub async fn snippet_accepted( let snip = storage_locked.tele_snippets.iter_mut().find(|s| s.snippet_telemetry_id == snippet_telemetry_id); if let Some(snip) = snip { snip.accepted_ts = chrono::Local::now().timestamp(); - info!("snippet_accepted: ID{}: snippet is accepted", snippet_telemetry_id); + debug!("snippet_accepted: ID{}: snippet is accepted", snippet_telemetry_id); return true; } return false; @@ -103,6 +103,9 @@ pub async fn sources_changed( ) { let tele_storage = gcx.read().await.telemetry.clone(); let mut storage_locked = tele_storage.write().unwrap(); + + basic_robot_human::create_robot_human_record_if_not_exists(&mut storage_locked.tele_robot_human, uri, text); + let mut accepted_snippets = vec![]; for snip in storage_locked.tele_snippets.iter_mut() { if snip.accepted_ts == 0 || !uri.ends_with(&snip.inputs.cursor.file) { @@ -119,7 +122,7 @@ pub async fn sources_changed( // if snip.id is not in the list of finished snippets, add it if !accepted_snippets.iter().any(|s: &SnippetTracker| s.snippet_telemetry_id == snip.snippet_telemetry_id) { accepted_snippets.push(snip.clone()); - info!("sources_changed: ID{}: snippet is added to accepted", snip.snippet_telemetry_id); + debug!("sources_changed: ID{}: snippet is added to accepted", snip.snippet_telemetry_id); } let (grey_valid, mut grey_corrected) = utils::if_head_tail_equal_return_added_text( @@ -135,13 +138,14 @@ pub async fn sources_changed( } else { if snip.remaining_percentage >= 0. { snip.finished_ts = chrono::Local::now().timestamp(); - // info!("ID{}: snippet is finished, remaining_percentage={}", snip.snippet_telemetry_id, snip.remaining_percentage); + debug!("ID{}: snippet is finished, remaining_percentage={}", snip.snippet_telemetry_id, snip.remaining_percentage); } else { snip.accepted_ts = 0; // that will cleanup and not send } } } + for snip in accepted_snippets { basic_robot_human::increase_counters_from_finished_snippet(&mut storage_locked.tele_robot_human, uri, text, &snip); basic_comp_counters::create_data_accumulator_for_finished_snippet(&mut storage_locked.snippet_data_accumulators, uri, &snip); diff --git a/src/telemetry/telemetry_structs.rs b/src/telemetry/telemetry_structs.rs index 3210d2997..753919c4d 100644 --- a/src/telemetry/telemetry_structs.rs +++ b/src/telemetry/telemetry_structs.rs @@ -60,7 +60,7 @@ pub struct SnippetTracker { pub finished_ts: i64, } -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize, Clone, Default)] pub struct TeleRobotHumanAccum { // Internal struct, not sent anywhere pub uri: String, @@ -78,18 +78,18 @@ pub struct TeleRobotHumanAccum { impl TeleRobotHumanAccum { pub fn new( - uri: String, model: String, baseline_text: String, robot_characters_acc_baseline: i64, used_snip_ids: Vec + uri: String, baseline_text: String ) -> Self { Self { uri: uri.clone(), file_extension: utils::extract_extension_or_filename(&uri), - model, + model: "".to_string(), baseline_text, baseline_updated_ts: 0, - robot_characters_acc_baseline, + robot_characters_acc_baseline: 0, robot_characters: 0, human_characters: 0, - used_snip_ids, + used_snip_ids: vec![], } } } diff --git a/src/telemetry/utils.rs b/src/telemetry/utils.rs index ff87e4289..7f1020fc4 100644 --- a/src/telemetry/utils.rs +++ b/src/telemetry/utils.rs @@ -20,27 +20,72 @@ pub fn get_add_del_from_texts( text_a: &String, text_b: &String, ) -> (String, String) { - let diff = TextDiff::from_lines(text_a, text_b); + let mut text_a_lines = text_a.lines().collect::>(); + let mut text_b_lines = text_b.lines().collect::>(); + + for s in &mut text_a_lines { + *s = s.trim_end().trim_start(); + // info!("text_a: {}; len: {}", s, s.len()); + } + + for s in &mut text_b_lines { + *s = s.trim_end().trim_start(); + // info!("text_b: {}; len: {}", s, s.len()); + } + + let mut text_a_new = text_a_lines.join("\n"); + let text_b_new = text_b_lines.join("\n"); + + if !text_a_new.ends_with("\n") { + text_a_new += "\n"; + } + + let diff = TextDiff::from_lines(&text_a_new, &text_b_new); + let mut added = "".to_string(); let mut removed = "".to_string(); for change in diff.iter_all_changes() { match change.tag() { ChangeTag::Delete => { + // info!("rem: {}; len: {}", change.value(), change.value().len()); removed += change.value(); } ChangeTag::Insert => { added += change.value(); + // info!("add: {}; len: {}", change.value(), change.value().len()); } ChangeTag::Equal => { } } } - added = added.replace("\r", ""); - removed = added.replace("\r", ""); + (added, removed) } +pub fn get_add_del_chars_from_texts( + text_a: &String, + text_b: &String, +) -> (String, String) { + let diff = TextDiff::from_chars(text_a, text_b); + let mut added = "".to_string(); + let mut removed = "".to_string(); + for change in diff.iter_all_changes() { + match change.tag() { + ChangeTag::Delete => { + removed += change.value(); + } + ChangeTag::Insert => { + added += change.value(); + } + ChangeTag::Equal => { + } + } + } + + (added, removed) +} + pub async fn file_save(path: PathBuf, json: serde_json::Value) -> Result<(), String> { let mut f = tokio::fs::File::create(path).await.map_err(|e| format!("{:?}", e))?; f.write_all(serde_json::to_string_pretty(&json).unwrap().as_bytes()).await.map_err(|e| format!("{}", e))?;