Simplify shutdown

This commit is contained in:
Dominik Werder
2023-09-08 16:07:08 +02:00
parent 4a31f3f81f
commit 8bbd6c37d1
10 changed files with 374 additions and 488 deletions

View File

@@ -262,6 +262,7 @@ enum CaConnState {
PeerReady,
Wait(Pin<Box<dyn Future<Output = ()> + Send>>),
Shutdown,
EndOfStream,
}
fn wait_fut(dt: u64) -> Pin<Box<dyn Future<Output = ()> + Send>> {
@@ -852,11 +853,11 @@ impl CaConn {
fn check_channels_alive(&mut self) -> Result<(), Error> {
let tsnow = Instant::now();
trace!("CheckChannelsAlive {addr:?}", addr = &self.remote_addr_dbg);
trace!("check_channels_alive {addr:?}", addr = &self.remote_addr_dbg);
if self.ioc_ping_last.elapsed() > Duration::from_millis(20000) {
if let Some(started) = self.ioc_ping_start {
if started.elapsed() > Duration::from_millis(4000) {
warn!("Echo timeout {addr:?}", addr = self.remote_addr_dbg);
warn!("pong timeout {addr:?}", addr = self.remote_addr_dbg);
let item = CaConnEvent {
ts: Instant::now(),
value: CaConnEventValue::EchoTimeout,
@@ -867,11 +868,11 @@ impl CaConn {
} else {
self.ioc_ping_start = Some(Instant::now());
if let Some(proto) = &mut self.proto {
debug!("push echo to {}", self.remote_addr_dbg);
debug!("ping to {}", self.remote_addr_dbg);
let msg = CaMsg { ty: CaMsgTy::Echo };
proto.push_out(msg);
} else {
warn!("can not push echo, no proto {}", self.remote_addr_dbg);
warn!("can not ping {} no proto", self.remote_addr_dbg);
self.trigger_shutdown(ChannelStatusClosedReason::NoProtocol);
}
}
@@ -1630,6 +1631,7 @@ impl CaConn {
Pending => Ok(Some(Pending)),
},
CaConnState::Shutdown => Ok(None),
CaConnState::EndOfStream => Ok(None),
}
}
@@ -1725,8 +1727,8 @@ impl CaConn {
Ok(())
}
fn outgoing_queues_empty(&self) -> bool {
self.channel_info_query_queue.is_empty() && !self.channel_info_query_sending.is_sending()
fn queues_async_out_flushed(&self) -> bool {
self.channel_info_query_queue.is_empty() && self.channel_info_query_sending.is_idle()
}
fn attempt_flush_channel_info_query(mut self: Pin<&mut Self>, cx: &mut Context) -> Result<(), Error> {
@@ -1741,7 +1743,7 @@ impl CaConn {
}
} else if let Some(item) = self.channel_info_query_queue.pop_front() {
let sd = &mut self.channel_info_query_sending;
sd.send2(item);
sd.send(item);
continue;
} else {
Ok(())
@@ -1758,7 +1760,9 @@ impl Stream for CaConn {
self.stats.caconn_poll_count_inc();
loop {
let mut have_pending = false;
break if let Err(e) = self.as_mut().handle_own_ticker(cx) {
break if let CaConnState::EndOfStream = self.state {
Ready(None)
} else if let Err(e) = self.as_mut().handle_own_ticker(cx) {
Ready(Some(Err(e)))
} else if let Some(item) = self.cmd_res_queue.pop_front() {
let item = CaConnEvent {
@@ -1779,21 +1783,17 @@ impl Stream for CaConn {
} else if let Ready(Some(Err(e))) = self.as_mut().handle_conn_command(cx) {
Ready(Some(Err(e)))
} else if let Some(item) = {
if self.is_shutdown() {
None
} else {
match self.loop_inner(cx) {
// TODO what does this mean: should we re-loop or yield something?
Ok(Some(Ready(()))) => None,
// This is the last step, so we yield Pending.
// But in general, this does not compose well when we would add another step.
Ok(Some(Pending)) => {
have_pending = true;
None
}
Ok(None) => None,
Err(e) => Some(Err(e)),
match self.loop_inner(cx) {
// TODO what does this mean: should we re-loop or yield something?
Ok(Some(Ready(()))) => None,
// This is the last step, so we yield Pending.
// But in general, this does not compose well when we would add another step.
Ok(Some(Pending)) => {
have_pending = true;
None
}
Ok(None) => None,
Err(e) => Some(Err(e)),
}
} {
Ready(Some(item))
@@ -1804,7 +1804,10 @@ impl Stream for CaConn {
ts: Instant::now(),
value: CaConnEventValue::None,
};
if have_pending {
if self.is_shutdown() && self.queues_async_out_flushed() {
self.state = CaConnState::EndOfStream;
Ready(None)
} else if have_pending {
Pending
} else {
continue;

View File

@@ -109,12 +109,18 @@ pub struct ChannelAdd {
local_epics_hostname: String,
}
#[derive(Debug, Clone)]
pub struct ChannelRemove {
name: String,
}
#[derive(Debug)]
pub enum ConnSetCmd {
SeriesLookupResult(Result<ChannelInfoResult, dbpg::seriesbychannel::Error>),
ChannelAdd(ChannelAdd),
ChannelAddWithStatusId(ChannelAddWithStatusId),
ChannelAddWithAddr(ChannelAddWithAddr),
ChannelRemove(ChannelRemove),
IocAddrQueryResult(VecDeque<FindIocRes>),
CheckHealth,
Shutdown,
@@ -126,18 +132,22 @@ pub enum CaConnSetEvent {
CaConnEvent((SocketAddr, CaConnEvent)),
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum CaConnSetItem {
Healthy,
}
#[derive(Clone)]
pub struct CaConnSetCtrl {
tx: Sender<CaConnSetEvent>,
pub rx: Receiver<CaConnSetItem>,
rx: Receiver<CaConnSetItem>,
jh: JoinHandle<Result<(), Error>>,
}
impl CaConnSetCtrl {
pub fn receiver(&self) -> Receiver<CaConnSetItem> {
self.rx.clone()
}
pub async fn add_channel(&self, backend: String, name: String, local_epics_hostname: String) -> Result<(), Error> {
let cmd = ChannelAdd {
backend,
@@ -149,6 +159,13 @@ impl CaConnSetCtrl {
Ok(())
}
pub async fn remove_channel(&self, name: String) -> Result<(), Error> {
let cmd = ChannelRemove { name };
let cmd = ConnSetCmd::ChannelRemove(cmd);
self.tx.send(CaConnSetEvent::ConnSetCmd(cmd)).await?;
Ok(())
}
pub async fn shutdown(&self) -> Result<(), Error> {
let cmd = ConnSetCmd::Shutdown;
self.tx.send(CaConnSetEvent::ConnSetCmd(cmd)).await?;
@@ -160,6 +177,11 @@ impl CaConnSetCtrl {
self.tx.send(CaConnSetEvent::ConnSetCmd(cmd)).await?;
Ok(())
}
pub async fn join(self) -> Result<(), Error> {
self.jh.await.map_err(|e| Error::with_msg_no_trace(e.to_string()))??;
Ok(())
}
}
#[derive(Debug)]
@@ -198,6 +220,7 @@ pub struct CaConnSet {
chan_check_next: Option<Channel>,
stats: CaConnSetStats,
connset_out_tx: Sender<CaConnSetItem>,
ioc_finder_jh: JoinHandle<Result<(), Error>>,
}
impl CaConnSet {
@@ -226,12 +249,14 @@ impl CaConnSet {
chan_check_next: None,
stats: CaConnSetStats::new(),
connset_out_tx,
ioc_finder_jh,
};
// TODO await on jh
let jh = tokio::spawn(CaConnSet::run(connset));
CaConnSetCtrl {
tx: connset_tx,
rx: connset_out_rx,
jh,
}
}
@@ -241,15 +266,31 @@ impl CaConnSet {
match x {
Ok(ev) => this.handle_event(ev).await?,
Err(_) => {
if this.shutdown_done {
if this.shutdown_stopping {
// all fine
break Ok(());
break;
} else {
error!("channel closed without shutdown_done");
error!("channel closed without shutdown_stopping");
}
}
}
if this.shutdown_stopping {
break;
}
}
debug!(
"search_tx sender {} receiver {}",
this.search_tx.sender_count(),
this.search_tx.receiver_count()
);
this.ioc_finder_jh
.await
.map_err(|e| Error::with_msg_no_trace(e.to_string()))??;
debug!("joined ioc_finder_jh");
this.connset_out_tx.close();
this.connset_rx.close();
this.shutdown_done = true;
Ok(())
}
async fn handle_event(&mut self, ev: CaConnSetEvent) -> Result<(), Error> {
@@ -258,6 +299,7 @@ impl CaConnSet {
ConnSetCmd::ChannelAdd(x) => self.handle_add_channel(x).await,
ConnSetCmd::ChannelAddWithStatusId(x) => self.handle_add_channel_with_status_id(x).await,
ConnSetCmd::ChannelAddWithAddr(x) => self.handle_add_channel_with_addr(x).await,
ConnSetCmd::ChannelRemove(x) => self.handle_remove_channel(x).await,
ConnSetCmd::IocAddrQueryResult(x) => self.handle_ioc_query_result(x).await,
ConnSetCmd::SeriesLookupResult(x) => self.handle_series_lookup_result(x).await,
ConnSetCmd::CheckHealth => self.handle_check_health().await,
@@ -301,6 +343,10 @@ impl CaConnSet {
}
async fn handle_add_channel(&mut self, add: ChannelAdd) -> Result<(), Error> {
if self.shutdown_stopping {
debug!("handle_add_channel but shutdown_stopping");
return Ok(());
}
// TODO should I add the transition through ActiveChannelState::Init as well?
let ch = Channel::new(add.name.clone());
let _st = self.channel_states.inner().entry(ch).or_insert_with(|| ChannelState {
@@ -322,6 +368,10 @@ impl CaConnSet {
}
async fn handle_add_channel_with_status_id(&mut self, add: ChannelAddWithStatusId) -> Result<(), Error> {
if self.shutdown_stopping {
debug!("handle_add_channel but shutdown_stopping");
return Ok(());
}
debug!("handle_add_channel_with_status_id {add:?}");
let ch = Channel::new(add.name.clone());
if let Some(chst) = self.channel_states.inner().get_mut(&ch) {
@@ -350,6 +400,10 @@ impl CaConnSet {
}
async fn handle_add_channel_with_addr(&mut self, add: ChannelAddWithAddr) -> Result<(), Error> {
if self.shutdown_stopping {
debug!("handle_add_channel but shutdown_stopping");
return Ok(());
}
if !self.ca_conn_ress.contains_key(&add.addr) {
let c = self.create_ca_conn(add.clone())?;
self.ca_conn_ress.insert(add.addr, c);
@@ -360,6 +414,43 @@ impl CaConnSet {
Ok(())
}
async fn handle_remove_channel(&mut self, add: ChannelRemove) -> Result<(), Error> {
let ch = Channel::new(add.name);
if let Some(k) = self.channel_states.inner().get_mut(&ch) {
match &k.value {
ChannelStateValue::Active(j) => match j {
ActiveChannelState::Init { .. } => {
k.value = ChannelStateValue::ToRemove { addr: None };
}
ActiveChannelState::WaitForStatusSeriesId { .. } => {
k.value = ChannelStateValue::ToRemove { addr: None };
}
ActiveChannelState::WithStatusSeriesId {
status_series_id: _,
state,
} => match &state.inner {
WithStatusSeriesIdStateInner::UnknownAddress { .. } => {
k.value = ChannelStateValue::ToRemove { addr: None };
}
WithStatusSeriesIdStateInner::SearchPending { .. } => {
k.value = ChannelStateValue::ToRemove { addr: None };
}
WithStatusSeriesIdStateInner::WithAddress { addr, state: _ } => {
k.value = ChannelStateValue::ToRemove {
addr: Some(addr.clone()),
};
}
WithStatusSeriesIdStateInner::NoAddress { .. } => {
k.value = ChannelStateValue::ToRemove { addr: None };
}
},
},
ChannelStateValue::ToRemove { .. } => {}
}
}
Ok(())
}
async fn handle_ioc_query_result(&mut self, res: VecDeque<FindIocRes>) -> Result<(), Error> {
for e in res {
let ch = Channel::new(e.channel.clone());
@@ -416,6 +507,7 @@ impl CaConnSet {
debug!("TODO handle_shutdown");
debug!("shutdown received");
self.shutdown_stopping = true;
self.search_tx.close();
for (addr, res) in self.ca_conn_ress.iter() {
let item = ConnCommand::shutdown();
res.sender.send(item).await?;
@@ -428,16 +520,20 @@ impl CaConnSet {
if let Some(e) = self.ca_conn_ress.remove(&addr) {
match e.jh.await {
Ok(Ok(())) => {
self.stats.ca_conn_task_join_done_ok_inc();
debug!("CaConn {addr} finished well");
}
Ok(Err(e)) => {
self.stats.ca_conn_task_join_done_err_inc();
error!("CaConn {addr} task error: {e}");
}
Err(e) => {
self.stats.ca_conn_task_join_err_inc();
error!("CaConn {addr} join error: {e}");
}
}
} else {
self.stats.ca_conn_task_eos_non_exist_inc();
warn!("end-of-stream received for non-existent CaConn {addr}");
}
Ok(())

View File

@@ -338,14 +338,9 @@ fn start_finder_ca(tx: Sender<DaemonEvent>, tgts: Vec<SocketAddrV4>) -> (Sender<
taskrun::spawn({
async move {
while let Ok(item) = arx.recv().await {
match tx.send(DaemonEvent::SearchDone(item)).await {
Ok(_) => {}
Err(e) => {
error!("search res fwd {e}");
}
}
todo!("send the result item");
}
warn!("search res fwd nput broken");
warn!("search res fwd inp closed");
}
});
(qtx, ioc_finder_jh)