Add service for Prometheus, shows up in Grafana

This commit is contained in:
Dominik Werder
2022-05-17 16:03:54 +02:00
parent c159b83b8c
commit 2f9a4092c8
5 changed files with 449 additions and 515 deletions

View File

@@ -11,16 +11,31 @@ use log::*;
use netpod::Database;
use scylla::batch::Consistency;
use serde::{Deserialize, Serialize};
use stats::{CaConnStats2, CaConnStats2Agg, CaConnVecStats};
use stats::{CaConnStats2Agg, CaConnStats2AggDiff};
use std::collections::BTreeMap;
use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4};
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::{Arc, Mutex, Once};
use std::time::{Duration, Instant};
use tokio::fs::OpenOptions;
use tokio::io::AsyncReadExt;
use tokio::net::TcpStream;
static mut METRICS: Option<Mutex<Option<CaConnStats2Agg>>> = None;
static METRICS_ONCE: Once = Once::new();
fn get_metrics() -> &'static mut Option<CaConnStats2Agg> {
METRICS_ONCE.call_once(|| unsafe {
METRICS = Some(Mutex::new(None));
});
let mut g = unsafe { METRICS.as_mut().unwrap().lock().unwrap() };
//let ret = g.as_mut().unwrap();
//let ret = g.as_mut(;
let ret: &mut Option<CaConnStats2Agg> = &mut *g;
let ret = unsafe { &mut *(ret as *mut _) };
ret
}
#[derive(Debug, Serialize, Deserialize)]
struct ChannelConfig {
channels: Vec<String>,
@@ -214,6 +229,7 @@ pub async fn ca_search(opts: ListenFromFileOpts) -> Result<(), Error> {
}
pub async fn ca_connect(opts: ListenFromFileOpts) -> Result<(), Error> {
tokio::spawn(start_metrics_service());
let facility = "scylla";
let opts = parse_config(opts.config).await?;
let d = Database {
@@ -249,7 +265,7 @@ pub async fn ca_connect(opts: ListenFromFileOpts) -> Result<(), Error> {
let rows = pg_client
.query(&qu_find_addr, &[&facility, ch])
.await
.map_err(|e| Error::with_msg_no_trace(format!("{e:?}")))?;
.map_err(|e| Error::with_msg_no_trace(format!("PG error: {e:?}")))?;
if rows.is_empty() {
error!("can not find address of channel {}", ch);
} else {
@@ -280,8 +296,7 @@ pub async fn ca_connect(opts: ListenFromFileOpts) -> Result<(), Error> {
}
let data_store = Arc::new(DataStore::new(pg_client, scy.clone()).await?);
let mut conn_jhs = vec![];
let mut conn_stats_all = vec![];
let mut conn_stats2 = vec![];
let mut conn_stats = vec![];
for (host, channels) in channels_by_host {
if false && host.ip() != &"172.26.24.76".parse::<Ipv4Addr>().unwrap() {
continue;
@@ -289,10 +304,15 @@ pub async fn ca_connect(opts: ListenFromFileOpts) -> Result<(), Error> {
let data_store = data_store.clone();
debug!("Create TCP connection to {:?}", (host.ip(), host.port()));
let addr = SocketAddrV4::new(host.ip().clone(), host.port());
let tcp = TcpStream::connect(addr).await?;
let tcp = match TcpStream::connect(addr).await {
Ok(k) => k,
Err(e) => {
error!("Can not connect to {addr:?} {e:?}");
continue;
}
};
let mut conn = CaConn::new(tcp, addr, data_store.clone());
conn_stats_all.push(conn.stats());
conn_stats2.push(conn.stats2());
conn_stats.push(conn.stats2());
for c in channels {
conn.channel_add(c);
}
@@ -314,22 +334,21 @@ pub async fn ca_connect(opts: ListenFromFileOpts) -> Result<(), Error> {
let jh = tokio::spawn(conn_block);
conn_jhs.push(jh);
}
let mut agg_last = CaConnVecStats::new(Instant::now());
let mut agg2_last = CaConnStats2Agg::new();
let mut agg_last = CaConnStats2Agg::new();
loop {
tokio::time::sleep(Duration::from_millis(2000)).await;
let mut agg = CaConnVecStats::new(Instant::now());
for st in &conn_stats_all {
agg.push(&st);
let agg = CaConnStats2Agg::new();
for g in &conn_stats {
agg.push(&g);
}
let mut agg2 = CaConnStats2Agg::new();
for st in &conn_stats2 {
agg2.push(&st);
}
let diff = agg.diff_against(&agg_last);
info!("{diff}");
let m = get_metrics();
*m = Some(agg.clone());
let diff = CaConnStats2AggDiff::diff_from(&agg_last, &agg);
info!("{}", diff.display());
agg_last = agg;
agg2_last = agg2;
if false {
break;
}
}
for jh in conn_jhs {
match jh.await {
@@ -346,3 +365,20 @@ pub async fn ca_connect(opts: ListenFromFileOpts) -> Result<(), Error> {
}
Ok(())
}
async fn start_metrics_service() {
let app = axum::Router::new().route(
"/metrics",
axum::routing::get(|| async {
let stats = get_metrics();
match stats {
Some(s) => s.prometheus(),
None => String::new(),
}
}),
);
axum::Server::bind(&"0.0.0.0:3011".parse().unwrap())
.serve(app.into_make_service())
.await
.unwrap()
}

View File

@@ -11,11 +11,11 @@ use libc::c_int;
use log::*;
use netpod::timeunits::SEC;
use netpod::{ScalarType, Shape};
use stats::{CaConnStats, CaConnStats2, IntervalEma};
use stats::{CaConnStats2, IntervalEma};
use std::collections::{BTreeMap, VecDeque};
use std::net::{Ipv4Addr, SocketAddrV4};
use std::pin::Pin;
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::{Duration, Instant, SystemTime};
@@ -105,10 +105,10 @@ macro_rules! insert_scalar_impl {
ts_msp_changed: bool,
st: ScalarType,
sh: Shape,
stats2: Arc<CaConnStats2>,
stats: Arc<CaConnStats2>,
) {
if futs_queue.len() >= INSERT_FUTS_LIM {
stats2.inserts_discard.fetch_add(1, Ordering::Release);
stats.inserts_discard.fetch_add(1, Ordering::AcqRel);
return;
}
let pulse = 0 as u64;
@@ -120,7 +120,7 @@ macro_rules! insert_scalar_impl {
val,
);
let fut3 = ScyInsertFut::new(data_store.scy.clone(), data_store.$qu_insert.clone(), params);
stats2.inserts_val.fetch_add(1, Ordering::Release);
stats.inserts_val.fetch_add(1, Ordering::AcqRel);
let fut = if ts_msp_changed {
let fut1 = ScyInsertFut::new(
data_store.scy.clone(),
@@ -138,7 +138,7 @@ macro_rules! insert_scalar_impl {
data_store.qu_insert_ts_msp.clone(),
(series.id() as i64, ts_msp as i64),
);
stats2.inserts_msp.fetch_add(1, Ordering::Release);
stats.inserts_msp.fetch_add(1, Ordering::AcqRel);
Box::pin(fut1.and_then(move |_| fut2).and_then(move |_| fut3)) as _
} else {
Box::pin(fut3) as _
@@ -162,10 +162,10 @@ macro_rules! insert_array_impl {
ts_msp_changed: bool,
st: ScalarType,
sh: Shape,
stats2: Arc<CaConnStats2>,
stats: Arc<CaConnStats2>,
) {
if futs_queue.len() >= INSERT_FUTS_LIM {
stats2.inserts_discard.fetch_add(1, Ordering::Release);
stats.inserts_discard.fetch_add(1, Ordering::AcqRel);
return;
}
let pulse = 0 as u64;
@@ -177,7 +177,7 @@ macro_rules! insert_array_impl {
val,
);
let fut3 = ScyInsertFut::new(data_store.scy.clone(), data_store.$qu_insert.clone(), params);
stats2.inserts_val.fetch_add(1, Ordering::Release);
stats.inserts_val.fetch_add(1, Ordering::AcqRel);
let fut = if ts_msp_changed {
let fut1 = ScyInsertFut::new(
data_store.scy.clone(),
@@ -195,7 +195,7 @@ macro_rules! insert_array_impl {
data_store.qu_insert_ts_msp.clone(),
(series.id() as i64, ts_msp as i64),
);
stats2.inserts_msp.fetch_add(1, Ordering::Release);
stats.inserts_msp.fetch_add(1, Ordering::AcqRel);
Box::pin(fut1.and_then(move |_| fut2).and_then(move |_| fut3)) as _
} else {
Box::pin(fut3) as _
@@ -298,7 +298,6 @@ pub struct CaConn {
FuturesOrdered<Pin<Box<dyn Future<Output = Result<(u32, u32, u16, u16, Existence<SeriesId>), Error>> + Send>>>,
value_insert_futs: FuturesOrdered<Pin<Box<dyn Future<Output = Result<(), Error>> + Send>>>,
remote_addr_dbg: SocketAddrV4,
stats: Arc<CaConnStats>,
stats2: Arc<CaConnStats2>,
}
@@ -321,15 +320,10 @@ impl CaConn {
fut_get_series: FuturesOrdered::new(),
value_insert_futs: FuturesOrdered::new(),
remote_addr_dbg,
stats: Arc::new(CaConnStats::new()),
stats2: Arc::new(CaConnStats2::new()),
}
}
pub fn stats(&self) -> Arc<CaConnStats> {
self.stats.clone()
}
pub fn stats2(&self) -> Arc<CaConnStats2> {
self.stats2.clone()
}
@@ -522,7 +516,6 @@ impl CaConn {
}
}
}
self.stats.inserts_started.fetch_add(1, Ordering::Release);
Ok(())
}
@@ -669,7 +662,7 @@ impl CaConn {
let mut msgs_tmp = vec![];
self.check_channels_state_init(&mut msgs_tmp)?;
let ts2 = Instant::now();
self.stats
self.stats2
.time_check_channels_state_init
.fetch_add((ts2.duration_since(ts1) * 1000000).as_secs(), Ordering::Release);
ts1 = ts2;
@@ -737,7 +730,7 @@ impl CaConn {
CaMsgTy::EventAddRes(k) => {
let res = Self::handle_event_add_res(self, k);
let ts2 = Instant::now();
self.stats
self.stats2
.time_handle_event_add_res
.fetch_add((ts2.duration_since(ts1) * 1000000).as_secs(), Ordering::Release);
ts1 = ts2;
@@ -781,13 +774,13 @@ impl Stream for CaConn {
let ret = loop {
self.handle_insert_futs(cx)?;
let ts2 = Instant::now();
self.stats
self.stats2
.poll_time_handle_insert_futs
.fetch_add((ts2.duration_since(ts1) * 1000000).as_secs(), Ordering::AcqRel);
ts1 = ts2;
self.handle_get_series_futs(cx)?;
let ts2 = Instant::now();
self.stats
self.stats2
.poll_time_get_series_futs
.fetch_add((ts2.duration_since(ts1) * 1000000).as_secs(), Ordering::AcqRel);
ts1 = ts2;
@@ -812,7 +805,7 @@ impl Stream for CaConn {
CaConnState::Listen => match {
let res = self.handle_conn_listen(cx);
let ts2 = Instant::now();
self.stats
self.stats2
.time_handle_conn_listen
.fetch_add((ts2.duration_since(ts1) * 1000000).as_secs(), Ordering::AcqRel);
ts1 = ts2;
@@ -824,9 +817,7 @@ impl Stream for CaConn {
CaConnState::PeerReady => {
let res = self.handle_peer_ready(cx);
let ts2 = Instant::now();
self.stats
.time_handle_peer_ready
.fetch_add((ts2.duration_since(ts1) * 1000000).as_secs(), Ordering::AcqRel);
self.stats2.time_handle_peer_ready_dur(ts2.duration_since(ts1));
ts1 = ts2;
res
}
@@ -837,12 +828,9 @@ impl Stream for CaConn {
if nn > 1000 {
warn!("insert_queue_len {nn}");
}
self.stats.insert_queue_len.store(nn, Ordering::Release);
self.stats2.inserts_queue_len.store(nn, Ordering::Release);
let ts_outer_2 = Instant::now();
self.stats.poll_time_all.fetch_add(
(ts_outer_2.duration_since(ts_outer_1) * 1000000).as_secs(),
Ordering::AcqRel,
);
self.stats2.poll_time_all_dur(ts_outer_2.duration_since(ts_outer_1));
ret
}
}