Merge pull request #940 from zed-industries/telemetry
Instrument the collab server with OpenTelemetry collecting into Honeycomb.io
This commit is contained in:
commit
cddafa5fef
14 changed files with 612 additions and 153 deletions
|
@ -25,21 +25,26 @@ base64 = "0.13"
|
|||
envy = "0.4.2"
|
||||
env_logger = "0.8"
|
||||
futures = "0.3"
|
||||
json_env_logger = "0.1"
|
||||
lazy_static = "1.4"
|
||||
lipsum = { version = "0.8", optional = true }
|
||||
log = { version = "0.4.16", features = ["kv_unstable_serde"] }
|
||||
opentelemetry = { version = "0.17", features = ["rt-tokio"] }
|
||||
opentelemetry-otlp = { version = "0.10", features = ["tls-roots"] }
|
||||
parking_lot = "0.11.1"
|
||||
rand = "0.8"
|
||||
scrypt = "0.7"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
sha-1 = "0.9"
|
||||
time = "0.2"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tokio-tungstenite = "0.17"
|
||||
tonic = "0.6"
|
||||
tower = "0.4"
|
||||
time = "0.2"
|
||||
toml = "0.5.8"
|
||||
tracing = "0.1"
|
||||
tracing-opentelemetry = "0.17"
|
||||
tracing-subscriber = "0.3"
|
||||
|
||||
[dependencies.sqlx]
|
||||
version = "0.5.2"
|
||||
|
|
|
@ -1 +1,3 @@
|
|||
ZED_ENVIRONMENT=production
|
||||
RUST_LOG=info
|
||||
TRACE_LEVEL=debug
|
||||
|
|
|
@ -1 +1,3 @@
|
|||
ZED_ENVIRONMENT=staging
|
||||
RUST_LOG=info
|
||||
TRACE_LEVEL=debug
|
||||
|
|
|
@ -81,10 +81,17 @@ spec:
|
|||
secretKeyRef:
|
||||
name: api
|
||||
key: token
|
||||
- name: LOG_JSON
|
||||
value: "1"
|
||||
- name: RUST_LOG
|
||||
value: "trace"
|
||||
value: ${RUST_LOG}
|
||||
- name: TRACE_LEVEL
|
||||
value: ${TRACE_LEVEL}
|
||||
- name: HONEYCOMB_DATASET
|
||||
value: "collab"
|
||||
- name: HONEYCOMB_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: honeycomb
|
||||
key: apiKey
|
||||
securityContext:
|
||||
capabilities:
|
||||
# FIXME - Switch to the more restrictive `PERFMON` capability.
|
||||
|
|
|
@ -431,6 +431,12 @@ macro_rules! id_type {
|
|||
self.0 as u64
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for $name {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -6,18 +6,21 @@ mod rpc;
|
|||
|
||||
use axum::{body::Body, http::StatusCode, response::IntoResponse, Router};
|
||||
use db::{Db, PostgresDb};
|
||||
|
||||
use serde::Deserialize;
|
||||
use std::{
|
||||
net::{SocketAddr, TcpListener},
|
||||
sync::Arc,
|
||||
};
|
||||
use tracing::metadata::LevelFilter;
|
||||
|
||||
#[derive(Default, Deserialize)]
|
||||
pub struct Config {
|
||||
pub http_port: u16,
|
||||
pub database_url: String,
|
||||
pub api_token: String,
|
||||
pub honeycomb_api_key: Option<String>,
|
||||
pub honeycomb_dataset: Option<String>,
|
||||
pub trace_level: Option<String>,
|
||||
}
|
||||
|
||||
pub struct AppState {
|
||||
|
@ -38,11 +41,7 @@ impl AppState {
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
if std::env::var("LOG_JSON").is_ok() {
|
||||
json_env_logger::init();
|
||||
} else {
|
||||
env_logger::init();
|
||||
}
|
||||
env_logger::init();
|
||||
|
||||
if let Err(error) = env::load_dotenv() {
|
||||
log::error!(
|
||||
|
@ -52,6 +51,7 @@ async fn main() -> Result<()> {
|
|||
}
|
||||
|
||||
let config = envy::from_env::<Config>().expect("error loading config");
|
||||
init_tracing(&config);
|
||||
let state = AppState::new(&config).await?;
|
||||
|
||||
let listener = TcpListener::bind(&format!("0.0.0.0:{}", config.http_port))
|
||||
|
@ -112,3 +112,51 @@ impl std::fmt::Display for Error {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn init_tracing(config: &Config) -> Option<()> {
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry_otlp::WithExportConfig;
|
||||
use std::str::FromStr;
|
||||
use tracing_opentelemetry::OpenTelemetryLayer;
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
|
||||
let (honeycomb_api_key, honeycomb_dataset) = config
|
||||
.honeycomb_api_key
|
||||
.clone()
|
||||
.zip(config.honeycomb_dataset.clone())?;
|
||||
|
||||
let mut metadata = tonic::metadata::MetadataMap::new();
|
||||
metadata.insert("x-honeycomb-team", honeycomb_api_key.parse().unwrap());
|
||||
let tracer = opentelemetry_otlp::new_pipeline()
|
||||
.tracing()
|
||||
.with_exporter(
|
||||
opentelemetry_otlp::new_exporter()
|
||||
.tonic()
|
||||
.with_endpoint("https://api.honeycomb.io")
|
||||
.with_metadata(metadata),
|
||||
)
|
||||
.with_trace_config(opentelemetry::sdk::trace::config().with_resource(
|
||||
opentelemetry::sdk::Resource::new(vec![KeyValue::new(
|
||||
"service.name",
|
||||
honeycomb_dataset,
|
||||
)]),
|
||||
))
|
||||
.install_batch(opentelemetry::runtime::Tokio)
|
||||
.expect("failed to initialize tracing");
|
||||
|
||||
let subscriber = tracing_subscriber::Registry::default()
|
||||
.with(OpenTelemetryLayer::new(tracer))
|
||||
.with(tracing_subscriber::fmt::layer())
|
||||
.with(
|
||||
config
|
||||
.trace_level
|
||||
.as_ref()
|
||||
.map_or(LevelFilter::INFO, |level| {
|
||||
LevelFilter::from_str(level).unwrap()
|
||||
}),
|
||||
);
|
||||
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
|
||||
None
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@ use axum::{
|
|||
use collections::{HashMap, HashSet};
|
||||
use futures::{channel::mpsc, future::BoxFuture, FutureExt, SinkExt, StreamExt, TryStreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use log::{as_debug, as_display};
|
||||
use rpc::{
|
||||
proto::{self, AnyTypedEnvelope, EntityMessage, EnvelopedMessage, RequestMessage},
|
||||
Connection, ConnectionId, Peer, TypedEnvelope,
|
||||
|
@ -38,7 +37,7 @@ use std::{
|
|||
ops::{Deref, DerefMut},
|
||||
rc::Rc,
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
time::Duration,
|
||||
};
|
||||
use store::{Store, Worktree};
|
||||
use time::OffsetDateTime;
|
||||
|
@ -47,11 +46,10 @@ use tokio::{
|
|||
time::Sleep,
|
||||
};
|
||||
use tower::ServiceBuilder;
|
||||
use util::ResultExt;
|
||||
use tracing::{info_span, instrument, Instrument};
|
||||
|
||||
type MessageHandler = Box<
|
||||
dyn Send + Sync + Fn(Arc<Server>, Box<dyn AnyTypedEnvelope>) -> BoxFuture<'static, Result<()>>,
|
||||
>;
|
||||
type MessageHandler =
|
||||
Box<dyn Send + Sync + Fn(Arc<Server>, Box<dyn AnyTypedEnvelope>) -> BoxFuture<'static, ()>>;
|
||||
|
||||
pub struct Server {
|
||||
peer: Arc<Peer>,
|
||||
|
@ -156,7 +154,21 @@ impl Server {
|
|||
TypeId::of::<M>(),
|
||||
Box::new(move |server, envelope| {
|
||||
let envelope = envelope.into_any().downcast::<TypedEnvelope<M>>().unwrap();
|
||||
(handler)(server, *envelope).boxed()
|
||||
let span = info_span!(
|
||||
"handle message",
|
||||
payload_type = envelope.payload_type_name(),
|
||||
payload = serde_json::to_string_pretty(&envelope.payload)
|
||||
.unwrap()
|
||||
.as_str(),
|
||||
);
|
||||
let future = (handler)(server, *envelope);
|
||||
async move {
|
||||
if let Err(error) = future.await {
|
||||
tracing::error!(%error, "error handling message");
|
||||
}
|
||||
}
|
||||
.instrument(span)
|
||||
.boxed()
|
||||
}),
|
||||
);
|
||||
if prev_handler.is_some() {
|
||||
|
@ -209,7 +221,7 @@ impl Server {
|
|||
let receipt = envelope.receipt();
|
||||
let handler = handler.clone();
|
||||
async move {
|
||||
let mut store = server.store.write().await;
|
||||
let mut store = server.state_mut().await;
|
||||
let response = (handler)(server.clone(), &mut *store, envelope);
|
||||
match response {
|
||||
Ok(response) => {
|
||||
|
@ -233,12 +245,13 @@ impl Server {
|
|||
pub fn handle_connection<E: Executor>(
|
||||
self: &Arc<Self>,
|
||||
connection: Connection,
|
||||
addr: String,
|
||||
address: String,
|
||||
user_id: UserId,
|
||||
mut send_connection_id: Option<mpsc::Sender<ConnectionId>>,
|
||||
executor: E,
|
||||
) -> impl Future<Output = ()> {
|
||||
let mut this = self.clone();
|
||||
let span = info_span!("handle connection", %user_id, %address);
|
||||
async move {
|
||||
let (connection_id, handle_io, mut incoming_rx) = this
|
||||
.peer
|
||||
|
@ -253,6 +266,8 @@ impl Server {
|
|||
})
|
||||
.await;
|
||||
|
||||
tracing::info!(%user_id, %connection_id, %address, "connection opened");
|
||||
|
||||
if let Some(send_connection_id) = send_connection_id.as_mut() {
|
||||
let _ = send_connection_id.send(connection_id).await;
|
||||
}
|
||||
|
@ -270,50 +285,47 @@ impl Server {
|
|||
futures::pin_mut!(next_message);
|
||||
futures::select_biased! {
|
||||
result = handle_io => {
|
||||
if let Err(err) = result {
|
||||
log::error!("error handling rpc connection {:?} - {:?}", addr, err);
|
||||
if let Err(error) = result {
|
||||
tracing::error!(%error, "error handling I/O");
|
||||
}
|
||||
break;
|
||||
}
|
||||
message = next_message => {
|
||||
if let Some(message) = message {
|
||||
let start_time = Instant::now();
|
||||
let type_name = message.payload_type_name();
|
||||
log::info!(connection_id = connection_id.0, type_name = type_name; "rpc message received");
|
||||
if let Some(handler) = this.handlers.get(&message.payload_type_id()) {
|
||||
let notifications = this.notifications.clone();
|
||||
let is_background = message.is_background();
|
||||
let handle_message = (handler)(this.clone(), message);
|
||||
let handle_message = async move {
|
||||
if let Err(err) = handle_message.await {
|
||||
log::error!(connection_id = connection_id.0, type = type_name, error = as_display!(err); "rpc message error");
|
||||
let span = tracing::info_span!("receive message", %user_id, %connection_id, %address, type_name);
|
||||
async {
|
||||
if let Some(handler) = this.handlers.get(&message.payload_type_id()) {
|
||||
let notifications = this.notifications.clone();
|
||||
let is_background = message.is_background();
|
||||
let handle_message = (handler)(this.clone(), message);
|
||||
let handle_message = async move {
|
||||
handle_message.await;
|
||||
if let Some(mut notifications) = notifications {
|
||||
let _ = notifications.send(()).await;
|
||||
}
|
||||
};
|
||||
if is_background {
|
||||
executor.spawn_detached(handle_message);
|
||||
} else {
|
||||
log::info!(connection_id = connection_id.0, type = type_name, duration = as_debug!(start_time.elapsed()); "rpc message handled");
|
||||
handle_message.await;
|
||||
}
|
||||
if let Some(mut notifications) = notifications {
|
||||
let _ = notifications.send(()).await;
|
||||
}
|
||||
};
|
||||
if is_background {
|
||||
executor.spawn_detached(handle_message);
|
||||
} else {
|
||||
handle_message.await;
|
||||
tracing::error!("no message handler");
|
||||
}
|
||||
} else {
|
||||
log::warn!("unhandled message: {}", type_name);
|
||||
}
|
||||
}.instrument(span).await;
|
||||
} else {
|
||||
log::info!(address = as_debug!(addr); "rpc connection closed");
|
||||
tracing::info!(%user_id, %connection_id, %address, "connection closed");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(err) = this.sign_out(connection_id).await {
|
||||
log::error!("error signing out connection {:?} - {:?}", addr, err);
|
||||
if let Err(error) = this.sign_out(connection_id).await {
|
||||
tracing::error!(%error, "error signing out");
|
||||
}
|
||||
}
|
||||
}.instrument(span)
|
||||
}
|
||||
|
||||
async fn sign_out(self: &mut Arc<Self>, connection_id: ConnectionId) -> Result<()> {
|
||||
|
@ -849,6 +861,7 @@ impl Server {
|
|||
Ok(proto::GetUsersResponse { users })
|
||||
}
|
||||
|
||||
#[instrument(skip(self, state, user_ids))]
|
||||
fn update_contacts_for_users<'a>(
|
||||
self: &Arc<Self>,
|
||||
state: &Store,
|
||||
|
@ -864,7 +877,7 @@ impl Server {
|
|||
contacts: contacts.clone(),
|
||||
},
|
||||
)
|
||||
.log_err();
|
||||
.trace_err();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1084,6 +1097,14 @@ impl<'a> Drop for StoreWriteGuard<'a> {
|
|||
fn drop(&mut self) {
|
||||
#[cfg(test)]
|
||||
self.check_invariants();
|
||||
|
||||
let metrics = self.metrics();
|
||||
tracing::info!(
|
||||
connections = metrics.connections,
|
||||
registered_projects = metrics.registered_projects,
|
||||
shared_projects = metrics.shared_projects,
|
||||
"metrics"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1099,13 +1120,14 @@ impl Executor for RealExecutor {
|
|||
}
|
||||
}
|
||||
|
||||
#[instrument(skip(f))]
|
||||
fn broadcast<F>(sender_id: ConnectionId, receiver_ids: Vec<ConnectionId>, mut f: F)
|
||||
where
|
||||
F: FnMut(ConnectionId) -> anyhow::Result<()>,
|
||||
{
|
||||
for receiver_id in receiver_ids {
|
||||
if receiver_id != sender_id {
|
||||
f(receiver_id).log_err();
|
||||
f(receiver_id).trace_err();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1206,6 +1228,29 @@ fn to_tungstenite_message(message: AxumMessage) -> TungsteniteMessage {
|
|||
}
|
||||
}
|
||||
|
||||
pub trait ResultExt {
|
||||
type Ok;
|
||||
|
||||
fn trace_err(self) -> Option<Self::Ok>;
|
||||
}
|
||||
|
||||
impl<T, E> ResultExt for Result<T, E>
|
||||
where
|
||||
E: std::fmt::Debug,
|
||||
{
|
||||
type Ok = T;
|
||||
|
||||
fn trace_err(self) -> Option<T> {
|
||||
match self {
|
||||
Ok(value) => Some(value),
|
||||
Err(error) => {
|
||||
tracing::error!("{:?}", error);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
@ -3,6 +3,7 @@ use anyhow::{anyhow, Result};
|
|||
use collections::{BTreeMap, HashMap, HashSet};
|
||||
use rpc::{proto, ConnectionId};
|
||||
use std::{collections::hash_map, path::PathBuf};
|
||||
use tracing::instrument;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Store {
|
||||
|
@ -80,7 +81,33 @@ pub struct LeftProject {
|
|||
pub authorized_user_ids: Vec<UserId>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Metrics {
|
||||
pub connections: usize,
|
||||
pub registered_projects: usize,
|
||||
pub shared_projects: usize,
|
||||
}
|
||||
|
||||
impl Store {
|
||||
pub fn metrics(&self) -> Metrics {
|
||||
let connections = self.connections.len();
|
||||
let mut registered_projects = 0;
|
||||
let mut shared_projects = 0;
|
||||
for project in self.projects.values() {
|
||||
registered_projects += 1;
|
||||
if project.share.is_some() {
|
||||
shared_projects += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Metrics {
|
||||
connections,
|
||||
registered_projects,
|
||||
shared_projects,
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub fn add_connection(&mut self, connection_id: ConnectionId, user_id: UserId) {
|
||||
self.connections.insert(
|
||||
connection_id,
|
||||
|
@ -96,6 +123,7 @@ impl Store {
|
|||
.insert(connection_id);
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub fn remove_connection(
|
||||
&mut self,
|
||||
connection_id: ConnectionId,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue