mod event_coalescer; use crate::{ChannelId, TelemetrySettings}; use chrono::{DateTime, Utc}; use clock::SystemClock; use futures::Future; use gpui::{AppContext, AppMetadata, BackgroundExecutor, Task}; use once_cell::sync::Lazy; use parking_lot::Mutex; use release_channel::ReleaseChannel; use settings::{Settings, SettingsStore}; use sha2::{Digest, Sha256}; use std::io::Write; use std::{env, mem, path::PathBuf, sync::Arc, time::Duration}; use sysinfo::{ CpuRefreshKind, Pid, PidExt, ProcessExt, ProcessRefreshKind, RefreshKind, System, SystemExt, }; use telemetry_events::{ ActionEvent, AppEvent, AssistantEvent, AssistantKind, CallEvent, CopilotEvent, CpuEvent, EditEvent, EditorEvent, Event, EventRequestBody, EventWrapper, MemoryEvent, SettingEvent, }; use tempfile::NamedTempFile; use util::http::{self, HttpClient, HttpClientWithUrl, Method}; #[cfg(not(debug_assertions))] use util::ResultExt; use util::TryFutureExt; use self::event_coalescer::EventCoalescer; pub struct Telemetry { clock: Arc, http_client: Arc, executor: BackgroundExecutor, state: Arc>, } struct TelemetryState { settings: TelemetrySettings, metrics_id: Option>, // Per logged-in user installation_id: Option>, // Per app installation (different for dev, nightly, preview, and stable) session_id: Option, // Per app launch release_channel: Option<&'static str>, app_metadata: AppMetadata, architecture: &'static str, events_queue: Vec, flush_events_task: Option>, log_file: Option, is_staff: Option, first_event_date_time: Option>, event_coalescer: EventCoalescer, max_queue_size: usize, } #[cfg(debug_assertions)] const MAX_QUEUE_LEN: usize = 5; #[cfg(not(debug_assertions))] const MAX_QUEUE_LEN: usize = 50; #[cfg(debug_assertions)] const FLUSH_INTERVAL: Duration = Duration::from_secs(1); #[cfg(not(debug_assertions))] const FLUSH_INTERVAL: Duration = Duration::from_secs(60 * 5); static ZED_CLIENT_CHECKSUM_SEED: Lazy>> = Lazy::new(|| { option_env!("ZED_CLIENT_CHECKSUM_SEED") .map(|s| s.as_bytes().into()) .or_else(|| { env::var("ZED_CLIENT_CHECKSUM_SEED") .ok() .map(|s| s.as_bytes().into()) }) }); impl Telemetry { pub fn new( clock: Arc, client: Arc, cx: &mut AppContext, ) -> Arc { let release_channel = ReleaseChannel::try_global(cx).map(|release_channel| release_channel.display_name()); TelemetrySettings::register(cx); let state = Arc::new(Mutex::new(TelemetryState { settings: *TelemetrySettings::get_global(cx), app_metadata: cx.app_metadata(), architecture: env::consts::ARCH, release_channel, installation_id: None, metrics_id: None, session_id: None, events_queue: Vec::new(), flush_events_task: None, log_file: None, is_staff: None, first_event_date_time: None, event_coalescer: EventCoalescer::new(clock.clone()), max_queue_size: MAX_QUEUE_LEN, })); #[cfg(not(debug_assertions))] cx.background_executor() .spawn({ let state = state.clone(); async move { if let Some(tempfile) = NamedTempFile::new_in(util::paths::CONFIG_DIR.as_path()).log_err() { state.lock().log_file = Some(tempfile); } } }) .detach(); cx.observe_global::({ let state = state.clone(); move |cx| { let mut state = state.lock(); state.settings = *TelemetrySettings::get_global(cx); } }) .detach(); // TODO: Replace all hardware stuff with nested SystemSpecs json let this = Arc::new(Self { clock, http_client: client, executor: cx.background_executor().clone(), state, }); // We should only ever have one instance of Telemetry, leak the subscription to keep it alive // rather than store in TelemetryState, complicating spawn as subscriptions are not Send std::mem::forget(cx.on_app_quit({ let this = this.clone(); move |_| this.shutdown_telemetry() })); this } #[cfg(any(test, feature = "test-support"))] fn shutdown_telemetry(self: &Arc) -> impl Future { Task::ready(()) } // Skip calling this function in tests. // TestAppContext ends up calling this function on shutdown and it panics when trying to find the TelemetrySettings #[cfg(not(any(test, feature = "test-support")))] fn shutdown_telemetry(self: &Arc) -> impl Future { self.report_app_event("close".to_string()); // TODO: close final edit period and make sure it's sent Task::ready(()) } pub fn log_file_path(&self) -> Option { Some(self.state.lock().log_file.as_ref()?.path().to_path_buf()) } pub fn start( self: &Arc, installation_id: Option, session_id: String, cx: &mut AppContext, ) { let mut state = self.state.lock(); state.installation_id = installation_id.map(|id| id.into()); state.session_id = Some(session_id); drop(state); let this = self.clone(); cx.spawn(|_| async move { // Avoiding calling `System::new_all()`, as there have been crashes related to it let refresh_kind = RefreshKind::new() .with_memory() // For memory usage .with_processes(ProcessRefreshKind::everything()) // For process usage .with_cpu(CpuRefreshKind::everything()); // For core count let mut system = System::new_with_specifics(refresh_kind); // Avoiding calling `refresh_all()`, just update what we need system.refresh_specifics(refresh_kind); // Waiting some amount of time before the first query is important to get a reasonable value // https://docs.rs/sysinfo/0.29.10/sysinfo/trait.ProcessExt.html#tymethod.cpu_usage const DURATION_BETWEEN_SYSTEM_EVENTS: Duration = Duration::from_secs(4 * 60); loop { smol::Timer::after(DURATION_BETWEEN_SYSTEM_EVENTS).await; system.refresh_specifics(refresh_kind); let current_process = Pid::from_u32(std::process::id()); let Some(process) = system.processes().get(¤t_process) else { let process = current_process; log::error!("Failed to find own process {process:?} in system process table"); // TODO: Fire an error telemetry event return; }; this.report_memory_event(process.memory(), process.virtual_memory()); this.report_cpu_event(process.cpu_usage(), system.cpus().len() as u32); } }) .detach(); } pub fn set_authenticated_user_info( self: &Arc, metrics_id: Option, is_staff: bool, ) { let mut state = self.state.lock(); if !state.settings.metrics { return; } let metrics_id: Option> = metrics_id.map(|id| id.into()); state.metrics_id = metrics_id.clone(); state.is_staff = Some(is_staff); drop(state); } pub fn report_editor_event( self: &Arc, file_extension: Option, vim_mode: bool, operation: &'static str, copilot_enabled: bool, copilot_enabled_for_language: bool, ) { let event = Event::Editor(EditorEvent { file_extension, vim_mode, operation: operation.into(), copilot_enabled, copilot_enabled_for_language, }); self.report_event(event) } pub fn report_copilot_event( self: &Arc, suggestion_id: Option, suggestion_accepted: bool, file_extension: Option, ) { let event = Event::Copilot(CopilotEvent { suggestion_id, suggestion_accepted, file_extension, }); self.report_event(event) } pub fn report_assistant_event( self: &Arc, conversation_id: Option, kind: AssistantKind, model: &str, ) { let event = Event::Assistant(AssistantEvent { conversation_id, kind, model: model.to_string(), }); self.report_event(event) } pub fn report_call_event( self: &Arc, operation: &'static str, room_id: Option, channel_id: Option, ) { let event = Event::Call(CallEvent { operation: operation.to_string(), room_id, channel_id: channel_id.map(|cid| cid.0), }); self.report_event(event) } pub fn report_cpu_event(self: &Arc, usage_as_percentage: f32, core_count: u32) { let event = Event::Cpu(CpuEvent { usage_as_percentage, core_count, }); self.report_event(event) } pub fn report_memory_event( self: &Arc, memory_in_bytes: u64, virtual_memory_in_bytes: u64, ) { let event = Event::Memory(MemoryEvent { memory_in_bytes, virtual_memory_in_bytes, }); self.report_event(event) } pub fn report_app_event(self: &Arc, operation: String) -> Event { let event = Event::App(AppEvent { operation }); self.report_event(event.clone()); event } pub fn report_setting_event(self: &Arc, setting: &'static str, value: String) { let event = Event::Setting(SettingEvent { setting: setting.to_string(), value, }); self.report_event(event) } pub fn log_edit_event(self: &Arc, environment: &'static str) { let mut state = self.state.lock(); let period_data = state.event_coalescer.log_event(environment); drop(state); if let Some((start, end, environment)) = period_data { let event = Event::Edit(EditEvent { duration: end.timestamp_millis() - start.timestamp_millis(), environment: environment.to_string(), }); self.report_event(event); } } pub fn report_action_event(self: &Arc, source: &'static str, action: String) { let event = Event::Action(ActionEvent { source: source.to_string(), action, }); self.report_event(event) } fn report_event(self: &Arc, event: Event) { let mut state = self.state.lock(); if !state.settings.metrics { return; } if state.flush_events_task.is_none() { let this = self.clone(); let executor = self.executor.clone(); state.flush_events_task = Some(self.executor.spawn(async move { executor.timer(FLUSH_INTERVAL).await; this.flush_events(); })); } let date_time = self.clock.utc_now(); let milliseconds_since_first_event = match state.first_event_date_time { Some(first_event_date_time) => { date_time.timestamp_millis() - first_event_date_time.timestamp_millis() } None => { state.first_event_date_time = Some(date_time); 0 } }; let signed_in = state.metrics_id.is_some(); state.events_queue.push(EventWrapper { signed_in, milliseconds_since_first_event, event, }); if state.installation_id.is_some() && state.events_queue.len() >= state.max_queue_size { drop(state); self.flush_events(); } } pub fn metrics_id(self: &Arc) -> Option> { self.state.lock().metrics_id.clone() } pub fn installation_id(self: &Arc) -> Option> { self.state.lock().installation_id.clone() } pub fn is_staff(self: &Arc) -> Option { self.state.lock().is_staff } pub fn flush_events(self: &Arc) { let mut state = self.state.lock(); state.first_event_date_time = None; let mut events = mem::take(&mut state.events_queue); state.flush_events_task.take(); drop(state); if events.is_empty() { return; } let Some(checksum_seed) = &*ZED_CLIENT_CHECKSUM_SEED else { return; }; let this = self.clone(); self.executor .spawn( async move { let mut json_bytes = Vec::new(); if let Some(file) = &mut this.state.lock().log_file { let file = file.as_file_mut(); for event in &mut events { json_bytes.clear(); serde_json::to_writer(&mut json_bytes, event)?; file.write_all(&json_bytes)?; file.write_all(b"\n")?; } } { let state = this.state.lock(); let request_body = EventRequestBody { installation_id: state.installation_id.as_deref().map(Into::into), session_id: state.session_id.clone(), is_staff: state.is_staff, app_version: state .app_metadata .app_version .unwrap_or_default() .to_string(), os_name: state.app_metadata.os_name.to_string(), os_version: state .app_metadata .os_version .map(|version| version.to_string()), architecture: state.architecture.to_string(), release_channel: state.release_channel.map(Into::into), events, }; json_bytes.clear(); serde_json::to_writer(&mut json_bytes, &request_body)?; } let mut summer = Sha256::new(); summer.update(checksum_seed); summer.update(&json_bytes); summer.update(checksum_seed); let mut checksum = String::new(); for byte in summer.finalize().as_slice() { use std::fmt::Write; write!(&mut checksum, "{:02x}", byte).unwrap(); } let request = http::Request::builder() .method(Method::POST) .uri(this.http_client.build_zed_api_url("/telemetry/events")) .header("Content-Type", "text/plain") .header("x-zed-checksum", checksum) .body(json_bytes.into()); let response = this.http_client.send(request?).await?; if response.status() != 200 { log::error!("Failed to send events: HTTP {:?}", response.status()); } anyhow::Ok(()) } .log_err(), ) .detach(); } } #[cfg(test)] mod tests { use super::*; use chrono::TimeZone; use clock::FakeSystemClock; use gpui::TestAppContext; use util::http::FakeHttpClient; #[gpui::test] fn test_telemetry_flush_on_max_queue_size(cx: &mut TestAppContext) { init_test(cx); let clock = Arc::new(FakeSystemClock::new( Utc.with_ymd_and_hms(1990, 4, 12, 12, 0, 0).unwrap(), )); let http = FakeHttpClient::with_200_response(); let installation_id = Some("installation_id".to_string()); let session_id = "session_id".to_string(); cx.update(|cx| { let telemetry = Telemetry::new(clock.clone(), http, cx); telemetry.state.lock().max_queue_size = 4; telemetry.start(installation_id, session_id, cx); assert!(is_empty_state(&telemetry)); let first_date_time = clock.utc_now(); let operation = "test".to_string(); let event = telemetry.report_app_event(operation.clone()); assert_eq!( event, Event::App(AppEvent { operation: operation.clone(), }) ); assert_eq!(telemetry.state.lock().events_queue.len(), 1); assert!(telemetry.state.lock().flush_events_task.is_some()); assert_eq!( telemetry.state.lock().first_event_date_time, Some(first_date_time) ); clock.advance(chrono::Duration::milliseconds(100)); let event = telemetry.report_app_event(operation.clone()); assert_eq!( event, Event::App(AppEvent { operation: operation.clone(), }) ); assert_eq!(telemetry.state.lock().events_queue.len(), 2); assert!(telemetry.state.lock().flush_events_task.is_some()); assert_eq!( telemetry.state.lock().first_event_date_time, Some(first_date_time) ); clock.advance(chrono::Duration::milliseconds(100)); let event = telemetry.report_app_event(operation.clone()); assert_eq!( event, Event::App(AppEvent { operation: operation.clone(), }) ); assert_eq!(telemetry.state.lock().events_queue.len(), 3); assert!(telemetry.state.lock().flush_events_task.is_some()); assert_eq!( telemetry.state.lock().first_event_date_time, Some(first_date_time) ); clock.advance(chrono::Duration::milliseconds(100)); // Adding a 4th event should cause a flush let event = telemetry.report_app_event(operation.clone()); assert_eq!( event, Event::App(AppEvent { operation: operation.clone(), }) ); assert!(is_empty_state(&telemetry)); }); } #[gpui::test] async fn test_connection_timeout(executor: BackgroundExecutor, cx: &mut TestAppContext) { init_test(cx); let clock = Arc::new(FakeSystemClock::new( Utc.with_ymd_and_hms(1990, 4, 12, 12, 0, 0).unwrap(), )); let http = FakeHttpClient::with_200_response(); let installation_id = Some("installation_id".to_string()); let session_id = "session_id".to_string(); cx.update(|cx| { let telemetry = Telemetry::new(clock.clone(), http, cx); telemetry.state.lock().max_queue_size = 4; telemetry.start(installation_id, session_id, cx); assert!(is_empty_state(&telemetry)); let first_date_time = clock.utc_now(); let operation = "test".to_string(); let event = telemetry.report_app_event(operation.clone()); assert_eq!( event, Event::App(AppEvent { operation: operation.clone(), }) ); assert_eq!(telemetry.state.lock().events_queue.len(), 1); assert!(telemetry.state.lock().flush_events_task.is_some()); assert_eq!( telemetry.state.lock().first_event_date_time, Some(first_date_time) ); let duration = Duration::from_millis(1); // Test 1 millisecond before the flush interval limit is met executor.advance_clock(FLUSH_INTERVAL - duration); assert!(!is_empty_state(&telemetry)); // Test the exact moment the flush interval limit is met executor.advance_clock(duration); assert!(is_empty_state(&telemetry)); }); } // TODO: // Test settings // Update FakeHTTPClient to keep track of the number of requests and assert on it fn init_test(cx: &mut TestAppContext) { cx.update(|cx| { let settings_store = SettingsStore::test(cx); cx.set_global(settings_store); }); } fn is_empty_state(telemetry: &Telemetry) -> bool { telemetry.state.lock().events_queue.is_empty() && telemetry.state.lock().flush_events_task.is_none() && telemetry.state.lock().first_event_date_time.is_none() } }