Always double reconnection delay and add jitter (#35337)

Previously, we would pick an exponent between 0.5 and 2.5, which would cause a lot of clients to try reconnecting in rapid succession, overwhelming the server as a result. This pull request always doubles the previous delay and introduces a jitter that can, at most, double it. As part of this, we're also increasing the maximum reconnection delay from 10s to 30s: this gives us more space to spread out the reconnection requests. Release Notes: - N/A --------- Co-authored-by: Marshall Bowers <git@maxdeviant.com>
2025-07-30 17:34:09 +02:00 · 2025-07-30 17:34:09 +02:00 · 45af1fcc2f
commit 45af1fcc2f
parent 0aea5acc68
2 changed files with 8 additions and 9 deletions
--- a/crates/client/src/client.rs
+++ b/crates/client/src/client.rs
@ -31,7 +31,6 @@ use rpc::proto::{AnyTypedEnvelope, EnvelopedMessage, PeerId, RequestMessage};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use settings::{Settings, SettingsSources};
 use std::pin::Pin;
 use std::{
    any::TypeId,
    convert::TryFrom,
@ -45,6 +44,7 @@ use std::{
    },
    time::{Duration, Instant},
 };
 use std::{cmp, pin::Pin};
 use telemetry::Telemetry;
 use thiserror::Error;
 use tokio::net::TcpStream;
@ -78,7 +78,7 @@ pub static ZED_ALWAYS_ACTIVE: LazyLock<bool> =
    LazyLock::new(|| std::env::var("ZED_ALWAYS_ACTIVE").map_or(false, |e| !e.is_empty()));
 pub const INITIAL_RECONNECTION_DELAY: Duration = Duration::from_millis(500);
-pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(10);
+pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(30);
 pub const CONNECTION_TIMEOUT: Duration = Duration::from_secs(20);
 actions!(
@ -727,11 +727,10 @@ impl Client {
                                },
                                &cx,
                            );
-                            cx.background_executor().timer(delay).await;
+                            let jitter =
-                            delay = delay
+                                Duration::from_millis(rng.gen_range(0..delay.as_millis() as u64));
-                                .mul_f32(rng.gen_range(0.5..=2.5))
+                            cx.background_executor().timer(delay + jitter).await;
-                                .max(INITIAL_RECONNECTION_DELAY)
+                            delay = cmp::min(delay * 2, MAX_RECONNECTION_DELAY);
                                .min(MAX_RECONNECTION_DELAY);
                        } else {
                            break;
                        }
--- a/crates/collab/src/tests/integration_tests.rs
+++ b/crates/collab/src/tests/integration_tests.rs
@ -842,7 +842,7 @@ async fn test_client_disconnecting_from_room(
    // Allow user A to reconnect to the server.
    server.allow_connections();
-    executor.advance_clock(RECEIVE_TIMEOUT);
+    executor.advance_clock(RECONNECT_TIMEOUT);
    // Call user B again from client A.
    active_call_a
@ -1358,7 +1358,7 @@ async fn test_calls_on_multiple_connections(
    // User A reconnects automatically, then calls user B again.
    server.allow_connections();
-    executor.advance_clock(RECEIVE_TIMEOUT);
+    executor.advance_clock(RECONNECT_TIMEOUT);
    active_call_a
        .update(cx_a, |call, cx| {
            call.invite(client_b1.user_id().unwrap(), None, cx)