Always double reconnection delay and add jitter (#35337)

Previously, we would pick an exponent between 0.5 and 2.5, which would
cause a lot of clients to try reconnecting in rapid succession,
overwhelming the server as a result.

This pull request always doubles the previous delay and introduces a
jitter that can, at most, double it.

As part of this, we're also increasing the maximum reconnection delay
from 10s to 30s: this gives us more space to spread out the reconnection
requests.

Release Notes:

- N/A

---------

Co-authored-by: Marshall Bowers <git@maxdeviant.com>
This commit is contained in:
Antonio Scandurra 2025-07-30 17:34:09 +02:00 committed by Joseph T. Lyons
parent 4d44b9f659
commit 268fc411a2
2 changed files with 8 additions and 9 deletions

View file

@ -31,7 +31,6 @@ use rpc::proto::{AnyTypedEnvelope, EnvelopedMessage, PeerId, RequestMessage};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsSources};
use std::pin::Pin;
use std::{
any::TypeId,
convert::TryFrom,
@ -45,6 +44,7 @@ use std::{
},
time::{Duration, Instant},
};
use std::{cmp, pin::Pin};
use telemetry::Telemetry;
use thiserror::Error;
use tokio::net::TcpStream;
@ -78,7 +78,7 @@ pub static ZED_ALWAYS_ACTIVE: LazyLock<bool> =
LazyLock::new(|| std::env::var("ZED_ALWAYS_ACTIVE").map_or(false, |e| !e.is_empty()));
pub const INITIAL_RECONNECTION_DELAY: Duration = Duration::from_millis(500);
pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(10);
pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(30);
pub const CONNECTION_TIMEOUT: Duration = Duration::from_secs(20);
actions!(
@ -727,11 +727,10 @@ impl Client {
},
&cx,
);
cx.background_executor().timer(delay).await;
delay = delay
.mul_f32(rng.gen_range(0.5..=2.5))
.max(INITIAL_RECONNECTION_DELAY)
.min(MAX_RECONNECTION_DELAY);
let jitter =
Duration::from_millis(rng.gen_range(0..delay.as_millis() as u64));
cx.background_executor().timer(delay + jitter).await;
delay = cmp::min(delay * 2, MAX_RECONNECTION_DELAY);
} else {
break;
}

View file

@ -842,7 +842,7 @@ async fn test_client_disconnecting_from_room(
// Allow user A to reconnect to the server.
server.allow_connections();
executor.advance_clock(RECEIVE_TIMEOUT);
executor.advance_clock(RECONNECT_TIMEOUT);
// Call user B again from client A.
active_call_a
@ -1358,7 +1358,7 @@ async fn test_calls_on_multiple_connections(
// User A reconnects automatically, then calls user B again.
server.allow_connections();
executor.advance_clock(RECEIVE_TIMEOUT);
executor.advance_clock(RECONNECT_TIMEOUT);
active_call_a
.update(cx_a, |call, cx| {
call.invite(client_b1.user_id().unwrap(), None, cx)