From 45af1fcc2f6c201012f0045fa320f7667f6348a4 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Wed, 30 Jul 2025 17:34:09 +0200 Subject: [PATCH] Always double reconnection delay and add jitter (#35337) Previously, we would pick an exponent between 0.5 and 2.5, which would cause a lot of clients to try reconnecting in rapid succession, overwhelming the server as a result. This pull request always doubles the previous delay and introduces a jitter that can, at most, double it. As part of this, we're also increasing the maximum reconnection delay from 10s to 30s: this gives us more space to spread out the reconnection requests. Release Notes: - N/A --------- Co-authored-by: Marshall Bowers --- crates/client/src/client.rs | 13 ++++++------- crates/collab/src/tests/integration_tests.rs | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/client/src/client.rs b/crates/client/src/client.rs index e0f4a70b15..07e708f11b 100644 --- a/crates/client/src/client.rs +++ b/crates/client/src/client.rs @@ -31,7 +31,6 @@ use rpc::proto::{AnyTypedEnvelope, EnvelopedMessage, PeerId, RequestMessage}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use settings::{Settings, SettingsSources}; -use std::pin::Pin; use std::{ any::TypeId, convert::TryFrom, @@ -45,6 +44,7 @@ use std::{ }, time::{Duration, Instant}, }; +use std::{cmp, pin::Pin}; use telemetry::Telemetry; use thiserror::Error; use tokio::net::TcpStream; @@ -78,7 +78,7 @@ pub static ZED_ALWAYS_ACTIVE: LazyLock = LazyLock::new(|| std::env::var("ZED_ALWAYS_ACTIVE").map_or(false, |e| !e.is_empty())); pub const INITIAL_RECONNECTION_DELAY: Duration = Duration::from_millis(500); -pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(10); +pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(30); pub const CONNECTION_TIMEOUT: Duration = Duration::from_secs(20); actions!( @@ -727,11 +727,10 @@ impl Client { }, &cx, ); - cx.background_executor().timer(delay).await; - delay = delay - .mul_f32(rng.gen_range(0.5..=2.5)) - .max(INITIAL_RECONNECTION_DELAY) - .min(MAX_RECONNECTION_DELAY); + let jitter = + Duration::from_millis(rng.gen_range(0..delay.as_millis() as u64)); + cx.background_executor().timer(delay + jitter).await; + delay = cmp::min(delay * 2, MAX_RECONNECTION_DELAY); } else { break; } diff --git a/crates/collab/src/tests/integration_tests.rs b/crates/collab/src/tests/integration_tests.rs index 9795c27574..f1cc2bf24a 100644 --- a/crates/collab/src/tests/integration_tests.rs +++ b/crates/collab/src/tests/integration_tests.rs @@ -842,7 +842,7 @@ async fn test_client_disconnecting_from_room( // Allow user A to reconnect to the server. server.allow_connections(); - executor.advance_clock(RECEIVE_TIMEOUT); + executor.advance_clock(RECONNECT_TIMEOUT); // Call user B again from client A. active_call_a @@ -1358,7 +1358,7 @@ async fn test_calls_on_multiple_connections( // User A reconnects automatically, then calls user B again. server.allow_connections(); - executor.advance_clock(RECEIVE_TIMEOUT); + executor.advance_clock(RECONNECT_TIMEOUT); active_call_a .update(cx_a, |call, cx| { call.invite(client_b1.user_id().unwrap(), None, cx)