Always double reconnection delay and add jitter (#35337)

Previously, we would pick an exponent between 0.5 and 2.5, which would
cause a lot of clients to try reconnecting in rapid succession,
overwhelming the server as a result.

This pull request always doubles the previous delay and introduces a
jitter that can, at most, double it.

As part of this, we're also increasing the maximum reconnection delay
from 10s to 30s: this gives us more space to spread out the reconnection
requests.

Release Notes:

- N/A

---------

Co-authored-by: Marshall Bowers <git@maxdeviant.com>
This commit is contained in:
Antonio Scandurra 2025-07-30 17:34:09 +02:00 committed by GitHub
parent 0aea5acc68
commit 45af1fcc2f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 8 additions and 9 deletions

View file

@ -31,7 +31,6 @@ use rpc::proto::{AnyTypedEnvelope, EnvelopedMessage, PeerId, RequestMessage};
use schemars::JsonSchema; use schemars::JsonSchema;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsSources}; use settings::{Settings, SettingsSources};
use std::pin::Pin;
use std::{ use std::{
any::TypeId, any::TypeId,
convert::TryFrom, convert::TryFrom,
@ -45,6 +44,7 @@ use std::{
}, },
time::{Duration, Instant}, time::{Duration, Instant},
}; };
use std::{cmp, pin::Pin};
use telemetry::Telemetry; use telemetry::Telemetry;
use thiserror::Error; use thiserror::Error;
use tokio::net::TcpStream; use tokio::net::TcpStream;
@ -78,7 +78,7 @@ pub static ZED_ALWAYS_ACTIVE: LazyLock<bool> =
LazyLock::new(|| std::env::var("ZED_ALWAYS_ACTIVE").map_or(false, |e| !e.is_empty())); LazyLock::new(|| std::env::var("ZED_ALWAYS_ACTIVE").map_or(false, |e| !e.is_empty()));
pub const INITIAL_RECONNECTION_DELAY: Duration = Duration::from_millis(500); pub const INITIAL_RECONNECTION_DELAY: Duration = Duration::from_millis(500);
pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(10); pub const MAX_RECONNECTION_DELAY: Duration = Duration::from_secs(30);
pub const CONNECTION_TIMEOUT: Duration = Duration::from_secs(20); pub const CONNECTION_TIMEOUT: Duration = Duration::from_secs(20);
actions!( actions!(
@ -727,11 +727,10 @@ impl Client {
}, },
&cx, &cx,
); );
cx.background_executor().timer(delay).await; let jitter =
delay = delay Duration::from_millis(rng.gen_range(0..delay.as_millis() as u64));
.mul_f32(rng.gen_range(0.5..=2.5)) cx.background_executor().timer(delay + jitter).await;
.max(INITIAL_RECONNECTION_DELAY) delay = cmp::min(delay * 2, MAX_RECONNECTION_DELAY);
.min(MAX_RECONNECTION_DELAY);
} else { } else {
break; break;
} }

View file

@ -842,7 +842,7 @@ async fn test_client_disconnecting_from_room(
// Allow user A to reconnect to the server. // Allow user A to reconnect to the server.
server.allow_connections(); server.allow_connections();
executor.advance_clock(RECEIVE_TIMEOUT); executor.advance_clock(RECONNECT_TIMEOUT);
// Call user B again from client A. // Call user B again from client A.
active_call_a active_call_a
@ -1358,7 +1358,7 @@ async fn test_calls_on_multiple_connections(
// User A reconnects automatically, then calls user B again. // User A reconnects automatically, then calls user B again.
server.allow_connections(); server.allow_connections();
executor.advance_clock(RECEIVE_TIMEOUT); executor.advance_clock(RECONNECT_TIMEOUT);
active_call_a active_call_a
.update(cx_a, |call, cx| { .update(cx_a, |call, cx| {
call.invite(client_b1.user_id().unwrap(), None, cx) call.invite(client_b1.user_id().unwrap(), None, cx)