Merge pull request #940 from zed-industries/telemetry

Instrument the collab server with OpenTelemetry collecting into Honeycomb.io
This commit is contained in:
Antonio Scandurra 2022-04-29 17:50:55 +02:00 committed by GitHub
commit cddafa5fef
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 612 additions and 153 deletions

View file

@ -25,21 +25,26 @@ base64 = "0.13"
envy = "0.4.2"
env_logger = "0.8"
futures = "0.3"
json_env_logger = "0.1"
lazy_static = "1.4"
lipsum = { version = "0.8", optional = true }
log = { version = "0.4.16", features = ["kv_unstable_serde"] }
opentelemetry = { version = "0.17", features = ["rt-tokio"] }
opentelemetry-otlp = { version = "0.10", features = ["tls-roots"] }
parking_lot = "0.11.1"
rand = "0.8"
scrypt = "0.7"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
sha-1 = "0.9"
time = "0.2"
tokio = { version = "1", features = ["full"] }
tokio-tungstenite = "0.17"
tonic = "0.6"
tower = "0.4"
time = "0.2"
toml = "0.5.8"
tracing = "0.1"
tracing-opentelemetry = "0.17"
tracing-subscriber = "0.3"
[dependencies.sqlx]
version = "0.5.2"

View file

@ -1 +1,3 @@
ZED_ENVIRONMENT=production
RUST_LOG=info
TRACE_LEVEL=debug

View file

@ -1 +1,3 @@
ZED_ENVIRONMENT=staging
RUST_LOG=info
TRACE_LEVEL=debug

View file

@ -81,10 +81,17 @@ spec:
secretKeyRef:
name: api
key: token
- name: LOG_JSON
value: "1"
- name: RUST_LOG
value: "trace"
value: ${RUST_LOG}
- name: TRACE_LEVEL
value: ${TRACE_LEVEL}
- name: HONEYCOMB_DATASET
value: "collab"
- name: HONEYCOMB_API_KEY
valueFrom:
secretKeyRef:
name: honeycomb
key: apiKey
securityContext:
capabilities:
# FIXME - Switch to the more restrictive `PERFMON` capability.

View file

@ -431,6 +431,12 @@ macro_rules! id_type {
self.0 as u64
}
}
impl std::fmt::Display for $name {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
self.0.fmt(f)
}
}
};
}

View file

@ -6,18 +6,21 @@ mod rpc;
use axum::{body::Body, http::StatusCode, response::IntoResponse, Router};
use db::{Db, PostgresDb};
use serde::Deserialize;
use std::{
net::{SocketAddr, TcpListener},
sync::Arc,
};
use tracing::metadata::LevelFilter;
#[derive(Default, Deserialize)]
pub struct Config {
pub http_port: u16,
pub database_url: String,
pub api_token: String,
pub honeycomb_api_key: Option<String>,
pub honeycomb_dataset: Option<String>,
pub trace_level: Option<String>,
}
pub struct AppState {
@ -38,11 +41,7 @@ impl AppState {
#[tokio::main]
async fn main() -> Result<()> {
if std::env::var("LOG_JSON").is_ok() {
json_env_logger::init();
} else {
env_logger::init();
}
env_logger::init();
if let Err(error) = env::load_dotenv() {
log::error!(
@ -52,6 +51,7 @@ async fn main() -> Result<()> {
}
let config = envy::from_env::<Config>().expect("error loading config");
init_tracing(&config);
let state = AppState::new(&config).await?;
let listener = TcpListener::bind(&format!("0.0.0.0:{}", config.http_port))
@ -112,3 +112,51 @@ impl std::fmt::Display for Error {
}
}
}
pub fn init_tracing(config: &Config) -> Option<()> {
use opentelemetry::KeyValue;
use opentelemetry_otlp::WithExportConfig;
use std::str::FromStr;
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_subscriber::layer::SubscriberExt;
let (honeycomb_api_key, honeycomb_dataset) = config
.honeycomb_api_key
.clone()
.zip(config.honeycomb_dataset.clone())?;
let mut metadata = tonic::metadata::MetadataMap::new();
metadata.insert("x-honeycomb-team", honeycomb_api_key.parse().unwrap());
let tracer = opentelemetry_otlp::new_pipeline()
.tracing()
.with_exporter(
opentelemetry_otlp::new_exporter()
.tonic()
.with_endpoint("https://api.honeycomb.io")
.with_metadata(metadata),
)
.with_trace_config(opentelemetry::sdk::trace::config().with_resource(
opentelemetry::sdk::Resource::new(vec![KeyValue::new(
"service.name",
honeycomb_dataset,
)]),
))
.install_batch(opentelemetry::runtime::Tokio)
.expect("failed to initialize tracing");
let subscriber = tracing_subscriber::Registry::default()
.with(OpenTelemetryLayer::new(tracer))
.with(tracing_subscriber::fmt::layer())
.with(
config
.trace_level
.as_ref()
.map_or(LevelFilter::INFO, |level| {
LevelFilter::from_str(level).unwrap()
}),
);
tracing::subscriber::set_global_default(subscriber).unwrap();
None
}

View file

@ -25,7 +25,6 @@ use axum::{
use collections::{HashMap, HashSet};
use futures::{channel::mpsc, future::BoxFuture, FutureExt, SinkExt, StreamExt, TryStreamExt};
use lazy_static::lazy_static;
use log::{as_debug, as_display};
use rpc::{
proto::{self, AnyTypedEnvelope, EntityMessage, EnvelopedMessage, RequestMessage},
Connection, ConnectionId, Peer, TypedEnvelope,
@ -38,7 +37,7 @@ use std::{
ops::{Deref, DerefMut},
rc::Rc,
sync::Arc,
time::{Duration, Instant},
time::Duration,
};
use store::{Store, Worktree};
use time::OffsetDateTime;
@ -47,11 +46,10 @@ use tokio::{
time::Sleep,
};
use tower::ServiceBuilder;
use util::ResultExt;
use tracing::{info_span, instrument, Instrument};
type MessageHandler = Box<
dyn Send + Sync + Fn(Arc<Server>, Box<dyn AnyTypedEnvelope>) -> BoxFuture<'static, Result<()>>,
>;
type MessageHandler =
Box<dyn Send + Sync + Fn(Arc<Server>, Box<dyn AnyTypedEnvelope>) -> BoxFuture<'static, ()>>;
pub struct Server {
peer: Arc<Peer>,
@ -156,7 +154,21 @@ impl Server {
TypeId::of::<M>(),
Box::new(move |server, envelope| {
let envelope = envelope.into_any().downcast::<TypedEnvelope<M>>().unwrap();
(handler)(server, *envelope).boxed()
let span = info_span!(
"handle message",
payload_type = envelope.payload_type_name(),
payload = serde_json::to_string_pretty(&envelope.payload)
.unwrap()
.as_str(),
);
let future = (handler)(server, *envelope);
async move {
if let Err(error) = future.await {
tracing::error!(%error, "error handling message");
}
}
.instrument(span)
.boxed()
}),
);
if prev_handler.is_some() {
@ -209,7 +221,7 @@ impl Server {
let receipt = envelope.receipt();
let handler = handler.clone();
async move {
let mut store = server.store.write().await;
let mut store = server.state_mut().await;
let response = (handler)(server.clone(), &mut *store, envelope);
match response {
Ok(response) => {
@ -233,12 +245,13 @@ impl Server {
pub fn handle_connection<E: Executor>(
self: &Arc<Self>,
connection: Connection,
addr: String,
address: String,
user_id: UserId,
mut send_connection_id: Option<mpsc::Sender<ConnectionId>>,
executor: E,
) -> impl Future<Output = ()> {
let mut this = self.clone();
let span = info_span!("handle connection", %user_id, %address);
async move {
let (connection_id, handle_io, mut incoming_rx) = this
.peer
@ -253,6 +266,8 @@ impl Server {
})
.await;
tracing::info!(%user_id, %connection_id, %address, "connection opened");
if let Some(send_connection_id) = send_connection_id.as_mut() {
let _ = send_connection_id.send(connection_id).await;
}
@ -270,50 +285,47 @@ impl Server {
futures::pin_mut!(next_message);
futures::select_biased! {
result = handle_io => {
if let Err(err) = result {
log::error!("error handling rpc connection {:?} - {:?}", addr, err);
if let Err(error) = result {
tracing::error!(%error, "error handling I/O");
}
break;
}
message = next_message => {
if let Some(message) = message {
let start_time = Instant::now();
let type_name = message.payload_type_name();
log::info!(connection_id = connection_id.0, type_name = type_name; "rpc message received");
if let Some(handler) = this.handlers.get(&message.payload_type_id()) {
let notifications = this.notifications.clone();
let is_background = message.is_background();
let handle_message = (handler)(this.clone(), message);
let handle_message = async move {
if let Err(err) = handle_message.await {
log::error!(connection_id = connection_id.0, type = type_name, error = as_display!(err); "rpc message error");
let span = tracing::info_span!("receive message", %user_id, %connection_id, %address, type_name);
async {
if let Some(handler) = this.handlers.get(&message.payload_type_id()) {
let notifications = this.notifications.clone();
let is_background = message.is_background();
let handle_message = (handler)(this.clone(), message);
let handle_message = async move {
handle_message.await;
if let Some(mut notifications) = notifications {
let _ = notifications.send(()).await;
}
};
if is_background {
executor.spawn_detached(handle_message);
} else {
log::info!(connection_id = connection_id.0, type = type_name, duration = as_debug!(start_time.elapsed()); "rpc message handled");
handle_message.await;
}
if let Some(mut notifications) = notifications {
let _ = notifications.send(()).await;
}
};
if is_background {
executor.spawn_detached(handle_message);
} else {
handle_message.await;
tracing::error!("no message handler");
}
} else {
log::warn!("unhandled message: {}", type_name);
}
}.instrument(span).await;
} else {
log::info!(address = as_debug!(addr); "rpc connection closed");
tracing::info!(%user_id, %connection_id, %address, "connection closed");
break;
}
}
}
}
if let Err(err) = this.sign_out(connection_id).await {
log::error!("error signing out connection {:?} - {:?}", addr, err);
if let Err(error) = this.sign_out(connection_id).await {
tracing::error!(%error, "error signing out");
}
}
}.instrument(span)
}
async fn sign_out(self: &mut Arc<Self>, connection_id: ConnectionId) -> Result<()> {
@ -849,6 +861,7 @@ impl Server {
Ok(proto::GetUsersResponse { users })
}
#[instrument(skip(self, state, user_ids))]
fn update_contacts_for_users<'a>(
self: &Arc<Self>,
state: &Store,
@ -864,7 +877,7 @@ impl Server {
contacts: contacts.clone(),
},
)
.log_err();
.trace_err();
}
}
}
@ -1084,6 +1097,14 @@ impl<'a> Drop for StoreWriteGuard<'a> {
fn drop(&mut self) {
#[cfg(test)]
self.check_invariants();
let metrics = self.metrics();
tracing::info!(
connections = metrics.connections,
registered_projects = metrics.registered_projects,
shared_projects = metrics.shared_projects,
"metrics"
);
}
}
@ -1099,13 +1120,14 @@ impl Executor for RealExecutor {
}
}
#[instrument(skip(f))]
fn broadcast<F>(sender_id: ConnectionId, receiver_ids: Vec<ConnectionId>, mut f: F)
where
F: FnMut(ConnectionId) -> anyhow::Result<()>,
{
for receiver_id in receiver_ids {
if receiver_id != sender_id {
f(receiver_id).log_err();
f(receiver_id).trace_err();
}
}
}
@ -1206,6 +1228,29 @@ fn to_tungstenite_message(message: AxumMessage) -> TungsteniteMessage {
}
}
pub trait ResultExt {
type Ok;
fn trace_err(self) -> Option<Self::Ok>;
}
impl<T, E> ResultExt for Result<T, E>
where
E: std::fmt::Debug,
{
type Ok = T;
fn trace_err(self) -> Option<T> {
match self {
Ok(value) => Some(value),
Err(error) => {
tracing::error!("{:?}", error);
None
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -3,6 +3,7 @@ use anyhow::{anyhow, Result};
use collections::{BTreeMap, HashMap, HashSet};
use rpc::{proto, ConnectionId};
use std::{collections::hash_map, path::PathBuf};
use tracing::instrument;
#[derive(Default)]
pub struct Store {
@ -80,7 +81,33 @@ pub struct LeftProject {
pub authorized_user_ids: Vec<UserId>,
}
#[derive(Copy, Clone)]
pub struct Metrics {
pub connections: usize,
pub registered_projects: usize,
pub shared_projects: usize,
}
impl Store {
pub fn metrics(&self) -> Metrics {
let connections = self.connections.len();
let mut registered_projects = 0;
let mut shared_projects = 0;
for project in self.projects.values() {
registered_projects += 1;
if project.share.is_some() {
shared_projects += 1;
}
}
Metrics {
connections,
registered_projects,
shared_projects,
}
}
#[instrument(skip(self))]
pub fn add_connection(&mut self, connection_id: ConnectionId, user_id: UserId) {
self.connections.insert(
connection_id,
@ -96,6 +123,7 @@ impl Store {
.insert(connection_id);
}
#[instrument(skip(self))]
pub fn remove_connection(
&mut self,
connection_id: ConnectionId,