Simplify shutdown
This commit is contained in:
@@ -6,8 +6,6 @@ pub mod proto;
|
||||
pub mod search;
|
||||
pub mod statemap;
|
||||
|
||||
use self::connset::CaConnSetCtrl;
|
||||
use crate::ca::connset::CaConnSet;
|
||||
use crate::metrics::ExtraInsertsConf;
|
||||
use crate::rt::TokMx;
|
||||
use futures_util::Future;
|
||||
@@ -15,7 +13,6 @@ use futures_util::FutureExt;
|
||||
use log::*;
|
||||
use netpod::Database;
|
||||
use scywr::insertworker::InsertWorkerOpts;
|
||||
use scywr::iteminsertqueue::CommonInsertItemQueue;
|
||||
use scywr::store::DataStore;
|
||||
use stats::CaConnStatsAgg;
|
||||
use std::pin::Pin;
|
||||
@@ -39,7 +36,6 @@ pub struct IngestCommons {
|
||||
pub pgconf: Arc<Database>,
|
||||
pub backend: String,
|
||||
pub local_epics_hostname: String,
|
||||
pub insert_item_queue: Arc<CommonInsertItemQueue>,
|
||||
pub data_store: Arc<DataStore>,
|
||||
pub insert_ivl_min: Arc<AtomicU64>,
|
||||
pub extra_inserts_conf: TokMx<ExtraInsertsConf>,
|
||||
|
||||
@@ -262,6 +262,7 @@ enum CaConnState {
|
||||
PeerReady,
|
||||
Wait(Pin<Box<dyn Future<Output = ()> + Send>>),
|
||||
Shutdown,
|
||||
EndOfStream,
|
||||
}
|
||||
|
||||
fn wait_fut(dt: u64) -> Pin<Box<dyn Future<Output = ()> + Send>> {
|
||||
@@ -852,11 +853,11 @@ impl CaConn {
|
||||
|
||||
fn check_channels_alive(&mut self) -> Result<(), Error> {
|
||||
let tsnow = Instant::now();
|
||||
trace!("CheckChannelsAlive {addr:?}", addr = &self.remote_addr_dbg);
|
||||
trace!("check_channels_alive {addr:?}", addr = &self.remote_addr_dbg);
|
||||
if self.ioc_ping_last.elapsed() > Duration::from_millis(20000) {
|
||||
if let Some(started) = self.ioc_ping_start {
|
||||
if started.elapsed() > Duration::from_millis(4000) {
|
||||
warn!("Echo timeout {addr:?}", addr = self.remote_addr_dbg);
|
||||
warn!("pong timeout {addr:?}", addr = self.remote_addr_dbg);
|
||||
let item = CaConnEvent {
|
||||
ts: Instant::now(),
|
||||
value: CaConnEventValue::EchoTimeout,
|
||||
@@ -867,11 +868,11 @@ impl CaConn {
|
||||
} else {
|
||||
self.ioc_ping_start = Some(Instant::now());
|
||||
if let Some(proto) = &mut self.proto {
|
||||
debug!("push echo to {}", self.remote_addr_dbg);
|
||||
debug!("ping to {}", self.remote_addr_dbg);
|
||||
let msg = CaMsg { ty: CaMsgTy::Echo };
|
||||
proto.push_out(msg);
|
||||
} else {
|
||||
warn!("can not push echo, no proto {}", self.remote_addr_dbg);
|
||||
warn!("can not ping {} no proto", self.remote_addr_dbg);
|
||||
self.trigger_shutdown(ChannelStatusClosedReason::NoProtocol);
|
||||
}
|
||||
}
|
||||
@@ -1630,6 +1631,7 @@ impl CaConn {
|
||||
Pending => Ok(Some(Pending)),
|
||||
},
|
||||
CaConnState::Shutdown => Ok(None),
|
||||
CaConnState::EndOfStream => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1725,8 +1727,8 @@ impl CaConn {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn outgoing_queues_empty(&self) -> bool {
|
||||
self.channel_info_query_queue.is_empty() && !self.channel_info_query_sending.is_sending()
|
||||
fn queues_async_out_flushed(&self) -> bool {
|
||||
self.channel_info_query_queue.is_empty() && self.channel_info_query_sending.is_idle()
|
||||
}
|
||||
|
||||
fn attempt_flush_channel_info_query(mut self: Pin<&mut Self>, cx: &mut Context) -> Result<(), Error> {
|
||||
@@ -1741,7 +1743,7 @@ impl CaConn {
|
||||
}
|
||||
} else if let Some(item) = self.channel_info_query_queue.pop_front() {
|
||||
let sd = &mut self.channel_info_query_sending;
|
||||
sd.send2(item);
|
||||
sd.send(item);
|
||||
continue;
|
||||
} else {
|
||||
Ok(())
|
||||
@@ -1758,7 +1760,9 @@ impl Stream for CaConn {
|
||||
self.stats.caconn_poll_count_inc();
|
||||
loop {
|
||||
let mut have_pending = false;
|
||||
break if let Err(e) = self.as_mut().handle_own_ticker(cx) {
|
||||
break if let CaConnState::EndOfStream = self.state {
|
||||
Ready(None)
|
||||
} else if let Err(e) = self.as_mut().handle_own_ticker(cx) {
|
||||
Ready(Some(Err(e)))
|
||||
} else if let Some(item) = self.cmd_res_queue.pop_front() {
|
||||
let item = CaConnEvent {
|
||||
@@ -1779,21 +1783,17 @@ impl Stream for CaConn {
|
||||
} else if let Ready(Some(Err(e))) = self.as_mut().handle_conn_command(cx) {
|
||||
Ready(Some(Err(e)))
|
||||
} else if let Some(item) = {
|
||||
if self.is_shutdown() {
|
||||
None
|
||||
} else {
|
||||
match self.loop_inner(cx) {
|
||||
// TODO what does this mean: should we re-loop or yield something?
|
||||
Ok(Some(Ready(()))) => None,
|
||||
// This is the last step, so we yield Pending.
|
||||
// But in general, this does not compose well when we would add another step.
|
||||
Ok(Some(Pending)) => {
|
||||
have_pending = true;
|
||||
None
|
||||
}
|
||||
Ok(None) => None,
|
||||
Err(e) => Some(Err(e)),
|
||||
match self.loop_inner(cx) {
|
||||
// TODO what does this mean: should we re-loop or yield something?
|
||||
Ok(Some(Ready(()))) => None,
|
||||
// This is the last step, so we yield Pending.
|
||||
// But in general, this does not compose well when we would add another step.
|
||||
Ok(Some(Pending)) => {
|
||||
have_pending = true;
|
||||
None
|
||||
}
|
||||
Ok(None) => None,
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
} {
|
||||
Ready(Some(item))
|
||||
@@ -1804,7 +1804,10 @@ impl Stream for CaConn {
|
||||
ts: Instant::now(),
|
||||
value: CaConnEventValue::None,
|
||||
};
|
||||
if have_pending {
|
||||
if self.is_shutdown() && self.queues_async_out_flushed() {
|
||||
self.state = CaConnState::EndOfStream;
|
||||
Ready(None)
|
||||
} else if have_pending {
|
||||
Pending
|
||||
} else {
|
||||
continue;
|
||||
|
||||
@@ -109,12 +109,18 @@ pub struct ChannelAdd {
|
||||
local_epics_hostname: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ChannelRemove {
|
||||
name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ConnSetCmd {
|
||||
SeriesLookupResult(Result<ChannelInfoResult, dbpg::seriesbychannel::Error>),
|
||||
ChannelAdd(ChannelAdd),
|
||||
ChannelAddWithStatusId(ChannelAddWithStatusId),
|
||||
ChannelAddWithAddr(ChannelAddWithAddr),
|
||||
ChannelRemove(ChannelRemove),
|
||||
IocAddrQueryResult(VecDeque<FindIocRes>),
|
||||
CheckHealth,
|
||||
Shutdown,
|
||||
@@ -126,18 +132,22 @@ pub enum CaConnSetEvent {
|
||||
CaConnEvent((SocketAddr, CaConnEvent)),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CaConnSetItem {
|
||||
Healthy,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CaConnSetCtrl {
|
||||
tx: Sender<CaConnSetEvent>,
|
||||
pub rx: Receiver<CaConnSetItem>,
|
||||
rx: Receiver<CaConnSetItem>,
|
||||
jh: JoinHandle<Result<(), Error>>,
|
||||
}
|
||||
|
||||
impl CaConnSetCtrl {
|
||||
pub fn receiver(&self) -> Receiver<CaConnSetItem> {
|
||||
self.rx.clone()
|
||||
}
|
||||
|
||||
pub async fn add_channel(&self, backend: String, name: String, local_epics_hostname: String) -> Result<(), Error> {
|
||||
let cmd = ChannelAdd {
|
||||
backend,
|
||||
@@ -149,6 +159,13 @@ impl CaConnSetCtrl {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn remove_channel(&self, name: String) -> Result<(), Error> {
|
||||
let cmd = ChannelRemove { name };
|
||||
let cmd = ConnSetCmd::ChannelRemove(cmd);
|
||||
self.tx.send(CaConnSetEvent::ConnSetCmd(cmd)).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn shutdown(&self) -> Result<(), Error> {
|
||||
let cmd = ConnSetCmd::Shutdown;
|
||||
self.tx.send(CaConnSetEvent::ConnSetCmd(cmd)).await?;
|
||||
@@ -160,6 +177,11 @@ impl CaConnSetCtrl {
|
||||
self.tx.send(CaConnSetEvent::ConnSetCmd(cmd)).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn join(self) -> Result<(), Error> {
|
||||
self.jh.await.map_err(|e| Error::with_msg_no_trace(e.to_string()))??;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -198,6 +220,7 @@ pub struct CaConnSet {
|
||||
chan_check_next: Option<Channel>,
|
||||
stats: CaConnSetStats,
|
||||
connset_out_tx: Sender<CaConnSetItem>,
|
||||
ioc_finder_jh: JoinHandle<Result<(), Error>>,
|
||||
}
|
||||
|
||||
impl CaConnSet {
|
||||
@@ -226,12 +249,14 @@ impl CaConnSet {
|
||||
chan_check_next: None,
|
||||
stats: CaConnSetStats::new(),
|
||||
connset_out_tx,
|
||||
ioc_finder_jh,
|
||||
};
|
||||
// TODO await on jh
|
||||
let jh = tokio::spawn(CaConnSet::run(connset));
|
||||
CaConnSetCtrl {
|
||||
tx: connset_tx,
|
||||
rx: connset_out_rx,
|
||||
jh,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -241,15 +266,31 @@ impl CaConnSet {
|
||||
match x {
|
||||
Ok(ev) => this.handle_event(ev).await?,
|
||||
Err(_) => {
|
||||
if this.shutdown_done {
|
||||
if this.shutdown_stopping {
|
||||
// all fine
|
||||
break Ok(());
|
||||
break;
|
||||
} else {
|
||||
error!("channel closed without shutdown_done");
|
||||
error!("channel closed without shutdown_stopping");
|
||||
}
|
||||
}
|
||||
}
|
||||
if this.shutdown_stopping {
|
||||
break;
|
||||
}
|
||||
}
|
||||
debug!(
|
||||
"search_tx sender {} receiver {}",
|
||||
this.search_tx.sender_count(),
|
||||
this.search_tx.receiver_count()
|
||||
);
|
||||
this.ioc_finder_jh
|
||||
.await
|
||||
.map_err(|e| Error::with_msg_no_trace(e.to_string()))??;
|
||||
debug!("joined ioc_finder_jh");
|
||||
this.connset_out_tx.close();
|
||||
this.connset_rx.close();
|
||||
this.shutdown_done = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_event(&mut self, ev: CaConnSetEvent) -> Result<(), Error> {
|
||||
@@ -258,6 +299,7 @@ impl CaConnSet {
|
||||
ConnSetCmd::ChannelAdd(x) => self.handle_add_channel(x).await,
|
||||
ConnSetCmd::ChannelAddWithStatusId(x) => self.handle_add_channel_with_status_id(x).await,
|
||||
ConnSetCmd::ChannelAddWithAddr(x) => self.handle_add_channel_with_addr(x).await,
|
||||
ConnSetCmd::ChannelRemove(x) => self.handle_remove_channel(x).await,
|
||||
ConnSetCmd::IocAddrQueryResult(x) => self.handle_ioc_query_result(x).await,
|
||||
ConnSetCmd::SeriesLookupResult(x) => self.handle_series_lookup_result(x).await,
|
||||
ConnSetCmd::CheckHealth => self.handle_check_health().await,
|
||||
@@ -301,6 +343,10 @@ impl CaConnSet {
|
||||
}
|
||||
|
||||
async fn handle_add_channel(&mut self, add: ChannelAdd) -> Result<(), Error> {
|
||||
if self.shutdown_stopping {
|
||||
debug!("handle_add_channel but shutdown_stopping");
|
||||
return Ok(());
|
||||
}
|
||||
// TODO should I add the transition through ActiveChannelState::Init as well?
|
||||
let ch = Channel::new(add.name.clone());
|
||||
let _st = self.channel_states.inner().entry(ch).or_insert_with(|| ChannelState {
|
||||
@@ -322,6 +368,10 @@ impl CaConnSet {
|
||||
}
|
||||
|
||||
async fn handle_add_channel_with_status_id(&mut self, add: ChannelAddWithStatusId) -> Result<(), Error> {
|
||||
if self.shutdown_stopping {
|
||||
debug!("handle_add_channel but shutdown_stopping");
|
||||
return Ok(());
|
||||
}
|
||||
debug!("handle_add_channel_with_status_id {add:?}");
|
||||
let ch = Channel::new(add.name.clone());
|
||||
if let Some(chst) = self.channel_states.inner().get_mut(&ch) {
|
||||
@@ -350,6 +400,10 @@ impl CaConnSet {
|
||||
}
|
||||
|
||||
async fn handle_add_channel_with_addr(&mut self, add: ChannelAddWithAddr) -> Result<(), Error> {
|
||||
if self.shutdown_stopping {
|
||||
debug!("handle_add_channel but shutdown_stopping");
|
||||
return Ok(());
|
||||
}
|
||||
if !self.ca_conn_ress.contains_key(&add.addr) {
|
||||
let c = self.create_ca_conn(add.clone())?;
|
||||
self.ca_conn_ress.insert(add.addr, c);
|
||||
@@ -360,6 +414,43 @@ impl CaConnSet {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_remove_channel(&mut self, add: ChannelRemove) -> Result<(), Error> {
|
||||
let ch = Channel::new(add.name);
|
||||
if let Some(k) = self.channel_states.inner().get_mut(&ch) {
|
||||
match &k.value {
|
||||
ChannelStateValue::Active(j) => match j {
|
||||
ActiveChannelState::Init { .. } => {
|
||||
k.value = ChannelStateValue::ToRemove { addr: None };
|
||||
}
|
||||
ActiveChannelState::WaitForStatusSeriesId { .. } => {
|
||||
k.value = ChannelStateValue::ToRemove { addr: None };
|
||||
}
|
||||
ActiveChannelState::WithStatusSeriesId {
|
||||
status_series_id: _,
|
||||
state,
|
||||
} => match &state.inner {
|
||||
WithStatusSeriesIdStateInner::UnknownAddress { .. } => {
|
||||
k.value = ChannelStateValue::ToRemove { addr: None };
|
||||
}
|
||||
WithStatusSeriesIdStateInner::SearchPending { .. } => {
|
||||
k.value = ChannelStateValue::ToRemove { addr: None };
|
||||
}
|
||||
WithStatusSeriesIdStateInner::WithAddress { addr, state: _ } => {
|
||||
k.value = ChannelStateValue::ToRemove {
|
||||
addr: Some(addr.clone()),
|
||||
};
|
||||
}
|
||||
WithStatusSeriesIdStateInner::NoAddress { .. } => {
|
||||
k.value = ChannelStateValue::ToRemove { addr: None };
|
||||
}
|
||||
},
|
||||
},
|
||||
ChannelStateValue::ToRemove { .. } => {}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_ioc_query_result(&mut self, res: VecDeque<FindIocRes>) -> Result<(), Error> {
|
||||
for e in res {
|
||||
let ch = Channel::new(e.channel.clone());
|
||||
@@ -416,6 +507,7 @@ impl CaConnSet {
|
||||
debug!("TODO handle_shutdown");
|
||||
debug!("shutdown received");
|
||||
self.shutdown_stopping = true;
|
||||
self.search_tx.close();
|
||||
for (addr, res) in self.ca_conn_ress.iter() {
|
||||
let item = ConnCommand::shutdown();
|
||||
res.sender.send(item).await?;
|
||||
@@ -428,16 +520,20 @@ impl CaConnSet {
|
||||
if let Some(e) = self.ca_conn_ress.remove(&addr) {
|
||||
match e.jh.await {
|
||||
Ok(Ok(())) => {
|
||||
self.stats.ca_conn_task_join_done_ok_inc();
|
||||
debug!("CaConn {addr} finished well");
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
self.stats.ca_conn_task_join_done_err_inc();
|
||||
error!("CaConn {addr} task error: {e}");
|
||||
}
|
||||
Err(e) => {
|
||||
self.stats.ca_conn_task_join_err_inc();
|
||||
error!("CaConn {addr} join error: {e}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.stats.ca_conn_task_eos_non_exist_inc();
|
||||
warn!("end-of-stream received for non-existent CaConn {addr}");
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -338,14 +338,9 @@ fn start_finder_ca(tx: Sender<DaemonEvent>, tgts: Vec<SocketAddrV4>) -> (Sender<
|
||||
taskrun::spawn({
|
||||
async move {
|
||||
while let Ok(item) = arx.recv().await {
|
||||
match tx.send(DaemonEvent::SearchDone(item)).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
error!("search res fwd {e}");
|
||||
}
|
||||
}
|
||||
todo!("send the result item");
|
||||
}
|
||||
warn!("search res fwd nput broken");
|
||||
warn!("search res fwd inp closed");
|
||||
}
|
||||
});
|
||||
(qtx, ioc_finder_jh)
|
||||
|
||||
@@ -1,11 +1,6 @@
|
||||
use crate::ca::conn::CaConnEvent;
|
||||
use crate::ca::connset::CaConnSetItem;
|
||||
use crate::ca::findioc::FindIocRes;
|
||||
use async_channel::Sender;
|
||||
use err::Error;
|
||||
use serde::Serialize;
|
||||
use std::collections::VecDeque;
|
||||
use std::net::SocketAddrV4;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, PartialEq, PartialOrd, Eq, Ord)]
|
||||
pub struct Channel {
|
||||
@@ -22,13 +17,11 @@ impl Channel {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum DaemonEvent {
|
||||
TimerTick(u32, Sender<u32>),
|
||||
ChannelAdd(Channel),
|
||||
ChannelRemove(Channel),
|
||||
SearchDone(Result<VecDeque<FindIocRes>, Error>),
|
||||
CaConnEvent(SocketAddrV4, CaConnEvent),
|
||||
CaConnSetItem(CaConnSetItem),
|
||||
Shutdown,
|
||||
}
|
||||
@@ -40,17 +33,6 @@ impl DaemonEvent {
|
||||
TimerTick(_, _) => format!("TimerTick"),
|
||||
ChannelAdd(x) => format!("ChannelAdd {x:?}"),
|
||||
ChannelRemove(x) => format!("ChannelRemove {x:?}"),
|
||||
SearchDone(_x) => format!("SearchDone"),
|
||||
CaConnEvent(_a, b) => {
|
||||
use crate::ca::conn::CaConnEventValue::*;
|
||||
match &b.value {
|
||||
None => format!("CaConnEvent/None"),
|
||||
EchoTimeout => format!("CaConnEvent/EchoTimeout"),
|
||||
ConnCommandResult(_) => format!("CaConnEvent/ConnCommandResult"),
|
||||
QueryItem(_) => format!("CaConnEvent/QueryItem"),
|
||||
EndOfStream => format!("CaConnEvent/EndOfStream"),
|
||||
}
|
||||
}
|
||||
CaConnSetItem(_) => format!("CaConnSetItem"),
|
||||
Shutdown => format!("Shutdown"),
|
||||
}
|
||||
|
||||
@@ -270,11 +270,10 @@ pub async fn metrics_agg_task(
|
||||
}
|
||||
}
|
||||
{
|
||||
let val = ingest_commons
|
||||
.insert_item_queue
|
||||
.receiver()
|
||||
.map_or(0, |x| x.len() as u64);
|
||||
agg.store_worker_recv_queue_len.store(val, Ordering::Release);
|
||||
warn!("TODO provide metrics with a weak ref to the query_item_channel");
|
||||
let nitems = 0;
|
||||
// let nitems = weak.upgrade()..len();
|
||||
agg.store_worker_recv_queue_len.store(nitems, Ordering::Release);
|
||||
}
|
||||
let mut m = METRICS.lock().unwrap();
|
||||
*m = Some(agg.clone());
|
||||
|
||||
@@ -42,11 +42,15 @@ impl<T> SenderPolling<T> {
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn is_idle(&self) -> bool {
|
||||
self.fut.is_none()
|
||||
}
|
||||
|
||||
pub fn is_sending(&self) -> bool {
|
||||
self.fut.is_some()
|
||||
}
|
||||
|
||||
pub fn send(self: Pin<&mut Self>, item: T) {
|
||||
pub fn send_pin(self: Pin<&mut Self>, item: T) {
|
||||
let (tx, fut) = unsafe {
|
||||
let x = Pin::get_unchecked_mut(self);
|
||||
(x.sender_ptr.as_mut(), &mut x.fut)
|
||||
@@ -55,7 +59,7 @@ impl<T> SenderPolling<T> {
|
||||
*fut = Some(s);
|
||||
}
|
||||
|
||||
pub fn send2(&mut self, item: T) {
|
||||
pub fn send(&mut self, item: T) {
|
||||
let sender = unsafe { self.sender_ptr.as_mut() };
|
||||
let s = sender.send(item);
|
||||
self.fut = Some(s);
|
||||
|
||||
Reference in New Issue
Block a user