Basic ca echo

This commit is contained in:
Dominik Werder
2023-01-12 11:04:31 +01:00
parent a5c927538e
commit 9ad7b79225
8 changed files with 408 additions and 304 deletions
+5
View File
@@ -197,6 +197,11 @@ pub async fn ca_connect(opts: CaIngestOpts, channels: &Vec<String>) -> Result<()
};
let ingest_commons = Arc::new(ingest_commons);
tokio::spawn({
let rx = ingest_commons.ca_conn_set.conn_item_rx();
async move { while let Ok(_item) = rx.recv().await {} }
});
// TODO use a new stats type:
let store_stats = Arc::new(CaConnStats::new());
let ttls = crate::insertworker::Ttls {
+74 -11
View File
@@ -362,6 +362,18 @@ impl ConnCommand {
}
}
#[derive(Debug)]
pub enum CaConnEventValue {
None,
EchoTimeout,
}
#[derive(Debug)]
pub struct CaConnEvent {
pub ts: Instant,
pub value: CaConnEventValue,
}
pub struct CaConn {
state: CaConnState,
shutdown: bool,
@@ -392,6 +404,8 @@ pub struct CaConn {
conn_backoff_beg: f32,
inserts_counter: u64,
extra_inserts_conf: ExtraInsertsConf,
ioc_ping_last: Instant,
ioc_ping_start: Option<Instant>,
}
impl CaConn {
@@ -434,6 +448,8 @@ impl CaConn {
conn_backoff_beg: 0.02,
inserts_counter: 0,
extra_inserts_conf: ExtraInsertsConf::new(),
ioc_ping_last: Instant::now(),
ioc_ping_start: None,
}
}
@@ -700,6 +716,25 @@ impl CaConn {
fn check_channels_alive(&mut self) -> Result<(), Error> {
let tsnow = Instant::now();
trace!("CheckChannelsAlive {addr:?}", addr = &self.remote_addr_dbg);
if self.ioc_ping_last.elapsed() > Duration::from_millis(20000) {
if let Some(started) = self.ioc_ping_start {
if started.elapsed() > Duration::from_millis(4000) {
warn!("Echo timeout {addr:?}", addr = self.remote_addr_dbg);
self.shutdown = true;
}
} else {
self.ioc_ping_start = Some(Instant::now());
if let Some(proto) = &mut self.proto {
trace!("push echo to {}", self.remote_addr_dbg);
let msg = CaMsg { ty: CaMsgTy::Echo };
proto.push_out(msg);
} else {
warn!("can not push echo, no proto");
self.shutdown = true;
}
}
}
let mut alive_count = 0;
let mut not_alive_count = 0;
for (_, st) in &self.channels {
@@ -1283,12 +1318,27 @@ impl CaConn {
warn!("channel access error message {e:?}");
}
CaMsgTy::AccessRightsRes(_) => {}
k => {
warn!("unexpected ca cmd {k:?}");
CaMsgTy::Echo => {
let addr = &self.remote_addr_dbg;
if let Some(started) = self.ioc_ping_start {
let dt = started.elapsed().as_secs_f32() * 1e3;
if dt > 50. {
info!("Received Echo {dt:10.0}ms {addr:?}");
} else if dt > 500. {
warn!("Received Echo {dt:10.0}ms {addr:?}");
}
} else {
info!("Received Echo even though we didn't asked for it {addr:?}");
}
self.ioc_ping_last = Instant::now();
self.ioc_ping_start = None;
}
_ => {
warn!("Received unexpected protocol message {:?}", k);
}
}
}
_ => {}
CaItem::Empty => {}
}
Ready(Some(Ok(())))
}
@@ -1342,7 +1392,7 @@ impl CaConn {
CaConnState::Unconnected => {
let addr = self.remote_addr_dbg.clone();
trace!("create tcp connection to {:?}", (addr.ip(), addr.port()));
let fut = tokio::time::timeout(Duration::from_millis(500), TcpStream::connect(addr));
let fut = tokio::time::timeout(Duration::from_millis(1000), TcpStream::connect(addr));
self.state = CaConnState::Connecting(addr, Box::pin(fut));
None
}
@@ -1469,17 +1519,19 @@ impl CaConn {
}
impl Stream for CaConn {
type Item = Result<(), Error>;
type Item = Result<CaConnEvent, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
self.stats.caconn_poll_count_inc();
if self.shutdown {
info!("CaConn poll");
info!("CaConn poll in shutdown");
}
let mut i1 = 0;
let ret = loop {
i1 += 1;
if self.shutdown {
info!("CaConn loop 1");
info!("CaConn in shutdown loop 1");
}
self.stats.caconn_loop1_count_inc();
if !self.shutdown {
@@ -1493,9 +1545,11 @@ impl Stream for CaConn {
if self.shutdown {
if self.insert_item_queue.len() == 0 {
trace!("no more items to flush");
break Ready(Ok(()));
if i1 >= 10 {
break Ready(Ok(()));
}
} else {
info!("more items {}", self.insert_item_queue.len());
//info!("more items {}", self.insert_item_queue.len());
}
}
if self.insert_item_queue.len() >= self.insert_queue_max {
@@ -1503,7 +1557,9 @@ impl Stream for CaConn {
}
if !self.shutdown {
if let Some(v) = self.loop_inner(cx) {
break v;
if i1 >= 10 {
break v;
}
}
}
};
@@ -1515,7 +1571,14 @@ impl Stream for CaConn {
return Ready(None);
}
match ret {
Ready(x) => Ready(Some(x)),
Ready(Ok(())) => {
let item = CaConnEvent {
ts: Instant::now(),
value: CaConnEventValue::None,
};
Ready(Some(Ok(item)))
}
Ready(Err(e)) => Ready(Some(Err(e))),
Pending => Pending,
}
}
+17 -5
View File
@@ -1,11 +1,11 @@
use super::conn::ConnCommand;
use super::conn::{CaConnEvent, ConnCommand};
use super::store::DataStore;
use super::IngestCommons;
use crate::ca::conn::CaConn;
use crate::errconv::ErrConv;
use crate::rt::{JoinHandle, TokMx};
use crate::store::CommonInsertItemQueueSender;
use async_channel::Sender;
use async_channel::{Receiver, Sender};
use err::Error;
use futures_util::{FutureExt, StreamExt};
use netpod::log::*;
@@ -67,15 +67,24 @@ impl CaConnRess {
// There, make spawning part of this function?
pub struct CaConnSet {
ca_conn_ress: TokMx<BTreeMap<SocketAddr, CaConnRess>>,
conn_item_tx: Sender<CaConnEvent>,
conn_item_rx: Receiver<CaConnEvent>,
}
impl CaConnSet {
pub fn new() -> Self {
let (conn_item_tx, conn_item_rx) = async_channel::bounded(10000);
Self {
ca_conn_ress: Default::default(),
conn_item_tx,
conn_item_rx,
}
}
pub fn conn_item_rx(&self) -> Receiver<CaConnEvent> {
self.conn_item_rx.clone()
}
pub fn ca_conn_ress(&self) -> &TokMx<BTreeMap<SocketAddr, CaConnRess>> {
&self.ca_conn_ress
}
@@ -108,21 +117,23 @@ impl CaConnSet {
let conn = conn;
let conn_tx = conn.conn_command_tx();
let conn_stats = conn.stats();
let conn_item_tx = self.conn_item_tx.clone();
let conn_fut = async move {
let stats = conn.stats();
let mut conn = conn;
while let Some(item) = conn.next().await {
match item {
Ok(_) => {
Ok(item) => {
stats.conn_item_count_inc();
conn_item_tx.send(item).await?;
}
Err(e) => {
error!("CaConn gives error: {e:?}");
break;
return Err(e);
}
}
}
Ok::<_, Error>(())
Ok(())
};
let jh = tokio::spawn(conn_fut);
let ca_conn_ress = CaConnRess {
@@ -234,6 +245,7 @@ impl CaConnSet {
Ok(())
}
/// Add channel, or create a new CaConn and add the channel.
pub async fn add_channel_to_addr(
&self,
backend: String,
+9
View File
@@ -205,6 +205,7 @@ pub enum CaMsgTy {
EventAddRes(EventAddRes),
ReadNotify(ReadNotify),
ReadNotifyRes(ReadNotifyRes),
Echo,
}
impl CaMsgTy {
@@ -227,6 +228,7 @@ impl CaMsgTy {
EventAddRes(_) => 0x01,
ReadNotify(_) => 0x0f,
ReadNotifyRes(_) => 0x0f,
Echo => 0x17,
}
}
@@ -259,6 +261,7 @@ impl CaMsgTy {
error!("should not attempt to serialize the response again");
panic!();
}
Echo => 0,
}
}
@@ -284,6 +287,7 @@ impl CaMsgTy {
EventAddRes(x) => x.data_type,
ReadNotify(x) => x.data_type,
ReadNotifyRes(x) => x.data_type,
Echo => 0,
}
}
@@ -306,6 +310,7 @@ impl CaMsgTy {
EventAddRes(x) => x.data_count,
ReadNotify(x) => x.data_count,
ReadNotifyRes(x) => x.data_count,
Echo => 0,
}
}
@@ -328,6 +333,7 @@ impl CaMsgTy {
EventAddRes(x) => x.status,
ReadNotify(x) => x.sid,
ReadNotifyRes(x) => x.sid,
Echo => 0,
}
}
@@ -350,6 +356,7 @@ impl CaMsgTy {
EventAddRes(x) => x.subid,
ReadNotify(x) => x.ioid,
ReadNotifyRes(x) => x.ioid,
Echo => 0,
}
}
@@ -413,6 +420,7 @@ impl CaMsgTy {
EventAddRes(_) => {}
ReadNotify(_) => {}
ReadNotifyRes(_) => {}
Echo => {}
}
}
}
@@ -708,6 +716,7 @@ impl CaMsg {
}),
}
}
0x17 => CaMsg { ty: CaMsgTy::Echo },
x => return Err(Error::with_msg_no_trace(format!("unsupported ca command {}", x))),
};
Ok(msg)
+9 -9
View File
@@ -43,6 +43,7 @@ async fn back_off_sleep(backoff_dt: &mut Duration) {
tokio::time::sleep(*backoff_dt).await;
}
#[derive(Debug, Clone)]
pub struct Ttls {
pub index: Duration,
pub d0: Duration,
@@ -130,9 +131,6 @@ pub async fn spawn_scylla_insert_workers(
insert_item_queue.receiver()
};
let ingest_commons = ingest_commons.clone();
let ttl_msp = ttls.index;
let ttl_0d = ttls.d0;
let ttl_1d = ttls.d1;
let fut = async move {
let backoff_0 = Duration::from_millis(10);
let mut backoff = backoff_0.clone();
@@ -146,7 +144,7 @@ pub async fn spawn_scylla_insert_workers(
};
match item {
QueryItem::ConnectionStatus(item) => {
match crate::store::insert_connection_status(item, ttl_msp, &data_store, &stats).await {
match crate::store::insert_connection_status(item, ttls.index, &data_store, &stats).await {
Ok(_) => {
stats.connection_status_insert_done_inc();
backoff = backoff_0;
@@ -158,7 +156,7 @@ pub async fn spawn_scylla_insert_workers(
}
}
QueryItem::ChannelStatus(item) => {
match crate::store::insert_channel_status(item, ttl_msp, &data_store, &stats).await {
match crate::store::insert_channel_status(item, ttls.index, &data_store, &stats).await {
Ok(_) => {
stats.channel_status_insert_done_inc();
backoff = backoff_0;
@@ -172,7 +170,9 @@ pub async fn spawn_scylla_insert_workers(
QueryItem::Insert(item) => {
let insert_frac = ingest_commons.insert_frac.load(Ordering::Acquire);
if i1 % 1000 < insert_frac {
match crate::store::insert_item(item, ttl_msp, ttl_0d, ttl_1d, &data_store, &stats).await {
match crate::store::insert_item(item, ttls.index, ttls.d0, ttls.d1, &data_store, &stats)
.await
{
Ok(_) => {
stats.store_worker_insert_done_inc();
backoff = backoff_0;
@@ -194,7 +194,7 @@ pub async fn spawn_scylla_insert_workers(
item.ts as i64,
item.ema,
item.emd,
ttl_msp.as_secs() as i32,
ttls.index.as_secs() as i32,
);
let qres = data_store.scy.execute(&data_store.qu_insert_muted, values).await;
match qres {
@@ -216,7 +216,7 @@ pub async fn spawn_scylla_insert_workers(
item.ts as i64,
item.ema,
item.emd,
ttl_msp.as_secs() as i32,
ttls.index.as_secs() as i32,
);
let qres = data_store
.scy
@@ -242,7 +242,7 @@ pub async fn spawn_scylla_insert_workers(
item.ivl,
item.interest,
item.evsize as i32,
ttl_msp.as_secs() as i32,
ttls.index.as_secs() as i32,
);
let qres = data_store.scy.execute(&data_store.qu_insert_channel_ping, params).await;
match qres {
+13 -2
View File
@@ -3,6 +3,7 @@ use crate::bsread::{ChannelDesc, GlobalTimestamp, HeadA, HeadB};
use crate::channelwriter::{ChannelWriter, ChannelWriterAll};
use crate::errconv::ErrConv;
use crate::netbuf::NetBuf;
use crate::store::CommonInsertItemQueueSender;
use async_channel::{Receiver, Sender};
#[allow(unused)]
use bytes::BufMut;
@@ -133,6 +134,7 @@ struct BsreadClient {
do_pulse_id: bool,
rcvbuf: Option<usize>,
tmp_vals_pulse_map: Vec<(i64, i32, i64, i32)>,
insert_item_sender: CommonInsertItemQueueSender,
scy: Arc<ScySession>,
channel_writers: BTreeMap<u64, Box<dyn ChannelWriter + Send>>,
common_queries: Arc<CommonQueries>,
@@ -144,6 +146,7 @@ impl BsreadClient {
pub async fn new(
opts: ZmtpClientOpts,
source_addr: String,
insert_item_sender: CommonInsertItemQueueSender,
scy: Arc<ScySession>,
common_queries: Arc<CommonQueries>,
) -> Result<Self, Error> {
@@ -152,7 +155,8 @@ impl BsreadClient {
do_pulse_id: opts.do_pulse_id,
rcvbuf: opts.rcvbuf,
opts,
tmp_vals_pulse_map: vec![],
tmp_vals_pulse_map: Vec::new(),
insert_item_sender,
scy,
channel_writers: Default::default(),
common_queries,
@@ -524,7 +528,14 @@ pub async fn zmtp_client(opts: ZmtpClientOpts) -> Result<(), Error> {
let common_queries = Arc::new(common_queries);
let mut jhs = vec![];
for source_addr in &opts.sources {
let client = BsreadClient::new(opts.clone(), source_addr.into(), scy.clone(), common_queries.clone()).await?;
let client = BsreadClient::new(
opts.clone(),
source_addr.into(),
todo!(),
scy.clone(),
common_queries.clone(),
)
.await?;
let fut = ClientRun::new(client);
//clients.push(fut);
let jh = tokio::spawn(fut);