Fall back to read-notify

This commit is contained in:
Dominik Werder
2024-01-19 16:23:56 +01:00
parent 05de9938c8
commit 403f0b37a3
11 changed files with 713 additions and 340 deletions

View File

@@ -51,7 +51,7 @@ async fn run_batcher<T>(rx: Receiver<T>, batch_tx: Sender<Vec<T>>, batch_limit:
},
Err(e) => {
let _: Elapsed = e;
trace!("-------------------------- batcher timeout rx len {}", rx.len());
// trace!("-------------------------- batcher timeout rx len {}", rx.len());
if all.len() > 0 {
do_emit = true;
}

View File

@@ -23,6 +23,7 @@ use scywriiq::QueryItem;
use stats::DaemonStats;
use stats::InsertWorkerStats;
use stats::SeriesByChannelStats;
use stats::SeriesWriterEstablishStats;
use std::collections::VecDeque;
use std::sync::atomic;
use std::sync::atomic::AtomicU64;
@@ -97,8 +98,10 @@ impl Daemon {
// Insert queue hook
// let query_item_rx = inserthook::active_channel_insert_hook(query_item_rx);
let (writer_establis_tx,) = serieswriter::writer::start_writer_establish_worker(channel_info_query_tx.clone())
.map_err(|e| Error::with_msg_no_trace(e.to_string()))?;
let wrest_stats = Arc::new(SeriesWriterEstablishStats::new());
let (writer_establis_tx,) =
serieswriter::writer::start_writer_establish_worker(channel_info_query_tx.clone(), wrest_stats.clone())
.map_err(|e| Error::with_msg_no_trace(e.to_string()))?;
let local_epics_hostname = ingest_linux::net::local_hostname();
let conn_set_ctrl = CaConnSet::start(

View File

@@ -20,7 +20,6 @@ pub fn local_hostname() -> String {
let hostname = CStr::from_ptr(&buf[0] as *const _ as _);
hostname.to_str().unwrap()
};
log::info!("---------------------- found hostname {hostname:?}");
hostname.into()
}

File diff suppressed because it is too large Load Diff

View File

@@ -85,7 +85,7 @@ const MAYBE_WRONG_ADDRESS_STAY: Duration = Duration::from_millis(4000);
const SEARCH_PENDING_TIMEOUT: Duration = Duration::from_millis(30000);
const CHANNEL_HEALTH_TIMEOUT: Duration = Duration::from_millis(30000);
const CHANNEL_UNASSIGNED_TIMEOUT: Duration = Duration::from_millis(0);
const CHANNEL_MAX_WITHOUT_HEALTH_UPDATE: usize = 10000;
const CHANNEL_MAX_WITHOUT_HEALTH_UPDATE: usize = 3000000;
#[allow(unused)]
macro_rules! trace2 {
@@ -1398,7 +1398,7 @@ impl CaConnSet {
}
for (addr, ch) in cmd_remove_channel {
if let Some(g) = self.ca_conn_ress.get_mut(&addr) {
let cmd = ConnCommand::channel_remove(ch.id().into());
let cmd = ConnCommand::channel_close(ch.id().into());
g.cmd_queue.push_back(cmd);
}
let cmd = ChannelRemove { name: ch.id().into() };
@@ -1756,13 +1756,7 @@ impl Stream for CaConnSet {
trace4!("CaConnSet poll done");
let poll_ts2 = Instant::now();
let dt = poll_ts2.saturating_duration_since(poll_ts1);
if dt > Duration::from_millis(80) {
warn!("long poll duration {:.0} ms", dt.as_secs_f32() * 1e3)
} else if dt > Duration::from_millis(40) {
info!("long poll duration {:.0} ms", dt.as_secs_f32() * 1e3)
} else if dt > Duration::from_millis(5) {
debug!("long poll duration {:.0} ms", dt.as_secs_f32() * 1e3)
}
self.stats.poll_all_dt().ingest((1e3 * dt.as_secs_f32()) as u32);
ret
}
}

View File

@@ -369,6 +369,7 @@ impl FindIocStream {
} else {
stats.ca_udp_first_msg_not_version().inc();
}
// trace2!("recv {:?} {:?}", src_addr, msgs);
let mut res = Vec::new();
if good {
for msg in &msgs[1..] {
@@ -616,7 +617,7 @@ impl Stream for FindIocStream {
match batch.tgts.pop_front() {
Some(tgtix) => {
Self::serialize_batch(buf1, batch);
debug!("serialized for search {:?}", batch.channels);
trace!("serialized for search {:?}", batch.channels);
match self.tgts.get(tgtix) {
Some(tgt) => {
let tgt = tgt.clone();

View File

@@ -111,6 +111,21 @@ pub struct EventAdd {
pub subid: u32,
}
#[derive(Debug)]
pub struct EventCancel {
pub data_type: u16,
pub data_count: u16,
pub sid: u32,
pub subid: u32,
}
#[derive(Debug)]
pub struct EventCancelRes {
pub data_type: u16,
pub sid: u32,
pub subid: u32,
}
// TODO Clone is only used for testing purposes and should get removed later.
#[derive(Debug, Clone)]
pub struct EventAddRes {
@@ -118,8 +133,15 @@ pub struct EventAddRes {
pub data_count: u32,
pub status: u32,
pub subid: u32,
pub value: CaEventValue,
pub payload_len: u32,
pub value: CaEventValue,
}
#[derive(Debug, Clone)]
pub struct EventAddResEmpty {
pub data_type: u16,
pub sid: u32,
pub subid: u32,
}
#[derive(Debug)]
@@ -136,6 +158,8 @@ pub struct ReadNotifyRes {
pub data_count: u32,
pub sid: u32,
pub ioid: u32,
pub payload_len: u32,
pub value: CaEventValue,
}
#[derive(Debug)]
@@ -283,6 +307,9 @@ pub enum CaMsgTy {
AccessRightsRes(AccessRightsRes),
EventAdd(EventAdd),
EventAddRes(EventAddRes),
EventAddResEmpty(EventAddResEmpty),
EventCancel(EventCancel),
EventCancelRes(EventCancelRes),
ReadNotify(ReadNotify),
ReadNotifyRes(ReadNotifyRes),
Echo,
@@ -306,6 +333,11 @@ impl CaMsgTy {
AccessRightsRes(_) => 0x16,
EventAdd(_) => 0x01,
EventAddRes(_) => 0x01,
// sic: the response to event-cancel is an event-add:
EventAddResEmpty(_) => 0x01,
EventCancel(_) => 0x02,
// sic: the response to event-cancel is an event-add:
EventCancelRes(_) => 0x01,
ReadNotify(_) => 0x0f,
ReadNotifyRes(_) => 0x0f,
Echo => 0x17,
@@ -318,7 +350,6 @@ impl CaMsgTy {
fn payload_len(&self) -> usize {
use CaMsgTy::*;
trace!("payload_len for {self:?}");
match self {
Version => 0,
VersionRes(_) => 0,
@@ -337,6 +368,9 @@ impl CaMsgTy {
error!("should not attempt to serialize the response again");
panic!();
}
EventAddResEmpty(_) => 0,
EventCancel(_) => 0,
EventCancelRes(_) => 0,
ReadNotify(_) => 0,
ReadNotifyRes(_) => {
error!("should not attempt to serialize the response again");
@@ -366,6 +400,9 @@ impl CaMsgTy {
AccessRightsRes(_) => 0,
EventAdd(x) => x.data_type,
EventAddRes(x) => x.data_type,
EventAddResEmpty(x) => x.data_type,
EventCancel(x) => x.data_type,
EventCancelRes(x) => x.data_type,
ReadNotify(x) => x.data_type,
ReadNotifyRes(x) => x.data_type,
Echo => 0,
@@ -395,6 +432,9 @@ impl CaMsgTy {
panic!();
x.data_count as _
}
EventAddResEmpty(_) => 0,
EventCancel(x) => x.data_count,
EventCancelRes(x) => 0,
ReadNotify(x) => x.data_count,
ReadNotifyRes(x) => {
panic!();
@@ -421,6 +461,9 @@ impl CaMsgTy {
AccessRightsRes(x) => x.cid,
EventAdd(x) => x.sid,
EventAddRes(x) => x.status,
EventAddResEmpty(x) => x.sid,
EventCancel(x) => x.sid,
EventCancelRes(x) => x.sid,
ReadNotify(x) => x.sid,
ReadNotifyRes(x) => x.sid,
Echo => 0,
@@ -444,6 +487,9 @@ impl CaMsgTy {
AccessRightsRes(x) => x.rights,
EventAdd(x) => x.subid,
EventAddRes(x) => x.subid,
EventAddResEmpty(x) => x.subid,
EventCancel(x) => x.subid,
EventCancelRes(x) => x.subid,
ReadNotify(x) => x.ioid,
ReadNotifyRes(x) => x.ioid,
Echo => 0,
@@ -508,6 +554,9 @@ impl CaMsgTy {
buf.copy_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0e, 0, 0]);
}
EventAddRes(_) => {}
EventAddResEmpty(_) => {}
EventCancel(_) => {}
EventCancelRes(_) => {}
ReadNotify(_) => {}
ReadNotifyRes(_) => {}
Echo => {}
@@ -704,127 +753,115 @@ impl CaMsg {
let ty = CaMsgTy::CreateChanFail(CreateChanFail { cid: hi.param1 });
CaMsg::from_ty_ts(ty, tsnow)
}
1 => {
use netpod::Shape;
let ca_dbr_ty = CaDbrType::from_ca_u16(hi.data_type)?;
if let CaDbrMetaType::Time = ca_dbr_ty.meta {
} else {
return Err(Error::MismatchDbrTimeType);
}
0x01 => {
if payload.len() < 12 {
return Err(Error::NotEnoughPayloadTimeMetadata(payload.len()));
if payload.len() == 0 {
if hi.data_count() != 0 {
// TODO according to protocol, this should not happen. Count for metrics.
}
let ty = CaMsgTy::EventAddResEmpty(EventAddResEmpty {
data_type: hi.data_type,
sid: hi.param1,
subid: hi.param2,
});
return Ok(CaMsg::from_ty_ts(ty, tsnow));
} else {
error!("EventAddRes but bad header {hi:?}");
return Err(Error::NotEnoughPayloadTimeMetadata(payload.len()));
}
}
let ca_status = u16::from_be_bytes(payload[0..2].try_into().map_err(|_| Error::BadSlice)?);
let ca_severity = u16::from_be_bytes(payload[2..4].try_into().map_err(|_| Error::BadSlice)?);
let ca_secs = u32::from_be_bytes(payload[4..8].try_into().map_err(|_| Error::BadSlice)?);
let ca_nanos = u32::from_be_bytes(payload[8..12].try_into().map_err(|_| Error::BadSlice)?);
let ca_sh = Shape::from_ca_count(hi.data_count() as _).map_err(|_| {
error!("BadCaCount {hi:?}");
Error::BadCaCount
})?;
let meta_padding = match ca_dbr_ty.meta {
CaDbrMetaType::Plain => 0,
CaDbrMetaType::Status => match ca_dbr_ty.scalar_type {
CaScalarType::I8 => 1,
CaScalarType::I16 => 0,
CaScalarType::I32 => 0,
CaScalarType::F32 => 0,
CaScalarType::F64 => 4,
CaScalarType::Enum => 0,
CaScalarType::String => 0,
},
CaDbrMetaType::Time => match ca_dbr_ty.scalar_type {
CaScalarType::I8 => 3,
CaScalarType::I16 => 2,
CaScalarType::I32 => 0,
CaScalarType::F32 => 0,
CaScalarType::F64 => 4,
CaScalarType::Enum => 2,
CaScalarType::String => 0,
},
};
let valbuf = &payload[12 + meta_padding..];
let value = match ca_sh {
Shape::Scalar => Self::ca_scalar_value(&ca_dbr_ty.scalar_type, valbuf)?,
Shape::Wave(n) => {
Self::ca_wave_value(&ca_dbr_ty.scalar_type, (n as usize).min(array_truncate), valbuf)?
}
Shape::Image(_, _) => {
error!("Can not handle image from channel access");
err::todoval()
}
};
let ts = SEC * (ca_secs as u64 + EPICS_EPOCH_OFFSET) + ca_nanos as u64;
let value = CaEventValue {
ts,
status: ca_status,
severity: ca_severity,
data: value,
};
let value = Self::extract_ca_data_value(hi, payload, array_truncate)?;
let d = EventAddRes {
data_type: hi.data_type,
data_count: hi.data_count() as _,
status: hi.param1,
subid: hi.param2,
value,
payload_len: hi.payload_len() as u32,
value,
};
// TODO quick test only
if false {
let nn = 4;
let mut blob = vec![0; nn];
for (i, x) in blob.iter_mut().enumerate() {
*x = i as _;
}
let d = EventAddRes {
// i32 with time and status
data_type: 19,
data_count: nn as u32,
status: hi.param1,
subid: hi.param2,
value: CaEventValue {
ts,
status: ca_status,
severity: ca_severity,
data: CaDataValue::Array(CaDataArrayValue::I32(blob)),
},
payload_len: hi.payload_len() as u32,
};
let ty = CaMsgTy::EventAddRes(d);
return Ok(CaMsg::from_ty_ts(ty, tsnow));
}
let ty = CaMsgTy::EventAddRes(d);
CaMsg::from_ty_ts(ty, tsnow)
}
15 => {
0x0f => {
if payload.len() == 8 {
let v = u64::from_be_bytes(payload.try_into().map_err(|_| Error::BadSlice)?);
info!("Payload as u64: {v}");
debug!("Payload as u64: {v}");
let v = i64::from_be_bytes(payload.try_into().map_err(|_| Error::BadSlice)?);
info!("Payload as i64: {v}");
debug!("Payload as i64: {v}");
let v = f64::from_be_bytes(payload.try_into().map_err(|_| Error::BadSlice)?);
info!("Payload as f64: {v}");
} else {
info!(
"payload string {:?} payload {:?}",
String::from_utf8_lossy(&payload[..payload.len().min(12)]),
&payload[..payload.len().min(12)],
);
debug!("Payload as f64: {v}");
}
// TODO use different structs for request and response:
let value = Self::extract_ca_data_value(hi, payload, array_truncate)?;
let ty = CaMsgTy::ReadNotifyRes(ReadNotifyRes {
data_type: hi.data_type,
data_count: hi.data_count() as _,
sid: hi.param1,
ioid: hi.param2,
payload_len: hi.payload_len() as u32,
value,
});
CaMsg::from_ty_ts(ty, tsnow)
}
0x17 => CaMsg::from_ty_ts(CaMsgTy::Echo, tsnow),
0x11 => CaMsg::from_ty_ts(CaMsgTy::Echo, tsnow),
x => return Err(Error::CaCommandNotSupported(x)),
};
Ok(msg)
}
fn extract_ca_data_value(hi: &HeadInfo, payload: &[u8], array_truncate: usize) -> Result<CaEventValue, Error> {
use netpod::Shape;
let ca_dbr_ty = CaDbrType::from_ca_u16(hi.data_type)?;
if let CaDbrMetaType::Time = ca_dbr_ty.meta {
} else {
return Err(Error::MismatchDbrTimeType);
}
let ca_status = u16::from_be_bytes(payload[0..2].try_into().map_err(|_| Error::BadSlice)?);
let ca_severity = u16::from_be_bytes(payload[2..4].try_into().map_err(|_| Error::BadSlice)?);
let ca_secs = u32::from_be_bytes(payload[4..8].try_into().map_err(|_| Error::BadSlice)?);
let ca_nanos = u32::from_be_bytes(payload[8..12].try_into().map_err(|_| Error::BadSlice)?);
let ca_sh = Shape::from_ca_count(hi.data_count() as _).map_err(|_| {
error!("BadCaCount {hi:?}");
Error::BadCaCount
})?;
let meta_padding = match ca_dbr_ty.meta {
CaDbrMetaType::Plain => 0,
CaDbrMetaType::Status => match ca_dbr_ty.scalar_type {
CaScalarType::I8 => 1,
CaScalarType::I16 => 0,
CaScalarType::I32 => 0,
CaScalarType::F32 => 0,
CaScalarType::F64 => 4,
CaScalarType::Enum => 0,
CaScalarType::String => 0,
},
CaDbrMetaType::Time => match ca_dbr_ty.scalar_type {
CaScalarType::I8 => 3,
CaScalarType::I16 => 2,
CaScalarType::I32 => 0,
CaScalarType::F32 => 0,
CaScalarType::F64 => 4,
CaScalarType::Enum => 2,
CaScalarType::String => 0,
},
};
let valbuf = &payload[12 + meta_padding..];
let value = match ca_sh {
Shape::Scalar => Self::ca_scalar_value(&ca_dbr_ty.scalar_type, valbuf)?,
Shape::Wave(n) => Self::ca_wave_value(&ca_dbr_ty.scalar_type, (n as usize).min(array_truncate), valbuf)?,
Shape::Image(_, _) => {
error!("Can not handle image from channel access");
err::todoval()
}
};
let ts = SEC * (ca_secs as u64 + EPICS_EPOCH_OFFSET) + ca_nanos as u64;
let value = CaEventValue {
ts,
status: ca_status,
severity: ca_severity,
data: value,
};
Ok(value)
}
}
#[derive(Debug)]
@@ -833,12 +870,6 @@ pub enum CaItem {
Msg(CaMsg),
}
impl CaItem {
fn empty() -> Self {
CaItem::Empty
}
}
#[derive(Clone, Debug)]
pub struct HeadInfo {
cmdid: u16,
@@ -953,6 +984,10 @@ impl CaProto {
}
}
pub fn proto_out_len(&self) -> usize {
self.out.len()
}
pub fn push_out(&mut self, item: CaMsg) {
self.out.push_back(item);
}

View File

@@ -29,6 +29,13 @@ where
_pin: PhantomPinned,
}
fn _require_unpin<T: Unpin>(_: &T) {}
fn _check_unpin() {
let _r: &SenderPolling<String> = err::todoval();
// _require_unpin(_r);
}
unsafe impl<T> core::marker::Send for SenderPolling<T> where T: core::marker::Send {}
impl<T> SenderPolling<T> {

View File

@@ -299,7 +299,7 @@ where
c.reset();
let nbins = tb.bins_ready_count();
if nbins >= 1 {
info!("store bins len {} {:?}", nbins, params.series);
trace!("store bins len {} {:?}", nbins, params.series);
store_bins(params.series.clone(), tb, iiq, next)?;
// if let Some(mut bins) = tb.bins_ready() {
// //info!("store bins {bins:?}");
@@ -363,7 +363,7 @@ fn store_bins(
}
// TODO this must depend on the data type: waveforms need smaller batches
let bins_per_msp = 10000;
let bins_per_msp = 82000;
let ts1ms = ts1 / MS;
let ts2ms = ts2 / MS;
@@ -382,15 +382,15 @@ fn store_bins(
avg,
};
let item = QueryItem::TimeBinSimpleF32(item);
debug!("push item B ts1ms {ts1ms} bin_len_ms {bin_len_ms} ts_msp {ts_msp} off {off}");
trace!("push item B ts1ms {ts1ms} bin_len_ms {bin_len_ms} ts_msp {ts_msp} off {off}");
iiq.push_back(item);
}
}
Ok(())
} else {
error!("unexpected container!");
return Err(Error::PatchUnexpectedContainer);
Err(Error::PatchUnexpectedContainer)
}
// TODO feed also the next patch collector for the next coarse resolution.
// pc.ingest(bins.as_mut())?;
// let noutq = pc.outq_len();
@@ -402,8 +402,6 @@ fn store_bins(
// warn!("pc outq len zero");
// Ok(())
// }
Ok(())
} else {
error!("have bins but none returned");
Err(Error::HaveBinsButNoneReturned)

View File

@@ -21,8 +21,12 @@ use scywr::iteminsertqueue::QueryItem;
use series::series::CHANNEL_STATUS_DUMMY_SCALAR_TYPE;
use series::ChannelStatusSeriesId;
use series::SeriesId;
use stats::SeriesByChannelStats;
use stats::SeriesWriterEstablishStats;
use std::collections::VecDeque;
use std::sync::atomic;
use std::sync::atomic::AtomicU64;
use std::sync::Arc;
use std::time::Duration;
use std::time::SystemTime;
#[derive(Debug, ThisError)]
@@ -213,20 +217,42 @@ pub struct JobId(pub u64);
pub struct EstablishWriterWorker {
worker_tx: Sender<ChannelInfoQuery>,
jobrx: Receiver<EstablishWorkerJob>,
stats: Arc<SeriesWriterEstablishStats>,
}
impl EstablishWriterWorker {
fn new(worker_tx: Sender<ChannelInfoQuery>, jobrx: Receiver<EstablishWorkerJob>) -> Self {
Self { worker_tx, jobrx }
fn new(
worker_tx: Sender<ChannelInfoQuery>,
jobrx: Receiver<EstablishWorkerJob>,
stats: Arc<SeriesWriterEstablishStats>,
) -> Self {
Self {
worker_tx,
jobrx,
stats,
}
}
async fn work(self) {
let cnt = Arc::new(AtomicU64::new(0));
taskrun::spawn({
let cnt = cnt.clone();
async move {
if true {
return Ok::<_, Error>(());
}
loop {
taskrun::tokio::time::sleep(Duration::from_millis(10000)).await;
debug!("EstablishWriterWorker cnt {}", cnt.load(atomic::Ordering::SeqCst));
}
Ok::<_, Error>(())
}
});
self.jobrx
.map(move |item| {
let wtx = self.worker_tx.clone();
let cnt = cnt.clone();
async move {
// TODO
debug!("got job");
let res = SeriesWriter::establish(
wtx.clone(),
item.backend,
@@ -236,6 +262,7 @@ impl EstablishWriterWorker {
item.tsnow,
)
.await;
cnt.fetch_add(1, atomic::Ordering::SeqCst);
if item.restx.send((item.job_id, res)).await.is_err() {
warn!("can not send writer establish result");
}
@@ -281,9 +308,10 @@ impl EstablishWorkerJob {
pub fn start_writer_establish_worker(
worker_tx: Sender<ChannelInfoQuery>,
stats: Arc<SeriesWriterEstablishStats>,
) -> Result<(Sender<EstablishWorkerJob>,), Error> {
let (tx, rx) = async_channel::bounded(256);
let worker = EstablishWriterWorker::new(worker_tx, rx);
let worker = EstablishWriterWorker::new(worker_tx, rx, stats);
taskrun::spawn(worker.work());
Ok((tx,))
}
@@ -292,6 +320,7 @@ pub fn start_writer_establish_worker(
fn write_00() {
use netpod::Database;
use scywr::session::ScyllaConfig;
use stats::SeriesByChannelStats;
use std::sync::Arc;
let fut = async {
let dbconf = &Database {

View File

@@ -306,6 +306,7 @@ stats_proc::stats_struct!((
channel_assigned_without_health_update,
channel_rogue,
),
histolog2s(poll_all_dt,),
),
// agg(name(CaConnSetStatsAgg), parent(CaConnSetStats)),
// diff(name(CaConnSetStatsDiff), input(CaConnSetStats)),
@@ -366,6 +367,7 @@ stats_proc::stats_struct!((
),
values(db_lookup_workers,)
),
stats_struct(name(SeriesWriterEstablishStats), prefix(wrest), counters(job_recv,),),
));
stats_proc::stats_struct!((
@@ -428,9 +430,6 @@ stats_proc::stats_struct!((
poll_reloop,
poll_pending,
poll_no_progress_no_pending,
poll_reloops_8,
poll_reloops_64,
poll_reloops_512,
poll_wake_break,
storage_queue_send,
storage_queue_pending,
@@ -438,9 +437,18 @@ stats_proc::stats_struct!((
storage_queue_above_32,
storage_queue_above_128,
event_add_res_recv,
caget_timeout,
),
values(inter_ivl_ema, read_ioids_len, proto_out_len,),
histolog2s(
poll_all_dt,
poll_op3_dt,
poll_reloops,
pong_recv_lat,
ca_ts_off,
iiq_batch_len,
caget_lat,
),
values(inter_ivl_ema),
histolog2s(pong_recv_lat, ca_ts_off,),
),
agg(name(CaConnStatsAgg), parent(CaConnStats)),
diff(name(CaConnStatsAggDiff), input(CaConnStatsAgg)),