From bc1818e496e3f737c8cb16eede1be13135f54879 Mon Sep 17 00:00:00 2001 From: Dominik Werder Date: Fri, 8 Nov 2024 09:24:55 +0100 Subject: [PATCH] Factored into separate crate --- .gitignore | 2 + Cargo.toml | 42 +++ src/boxed.rs | 57 ++++ src/cbor_stream.rs | 366 ++++++++++++++++++++++ src/collect.rs | 220 +++++++++++++ src/collect_adapter.rs | 124 ++++++++ src/dtflags.rs | 4 + src/events.rs | 1 + src/events/convertforbinning.rs | 166 ++++++++++ src/eventsplainreader.rs | 123 ++++++++ src/filechunkread.rs | 55 ++++ src/firsterr.rs | 46 +++ src/framed_bytes.rs | 135 ++++++++ src/frames.rs | 2 + src/frames/eventsfromframes.rs | 119 +++++++ src/frames/inmem.rs | 242 ++++++++++++++ src/generators.rs | 540 ++++++++++++++++++++++++++++++++ src/instrument.rs | 33 ++ src/itemclone.rs | 92 ++++++ src/json_stream.rs | 158 ++++++++++ src/lenframed.rs | 27 ++ src/lib.rs | 32 ++ src/needminbuffer.rs | 111 +++++++ src/plaineventscbor.rs | 30 ++ src/plaineventsjson.rs | 120 +++++++ src/plaineventsstream.rs | 226 +++++++++++++ src/print_on_done.rs | 41 +++ src/rangefilter2.rs | 276 ++++++++++++++++ src/rangefilter2/test.rs | 267 ++++++++++++++++ src/slidebuf.rs | 441 ++++++++++++++++++++++++++ src/streamtimeout.rs | 85 +++++ src/tcprawclient.rs | 205 ++++++++++++ src/test.rs | 62 ++++ src/test/collect.rs | 127 ++++++++ src/test/events.rs | 97 ++++++ src/test/timebin.rs | 445 ++++++++++++++++++++++++++ src/teststream.rs | 36 +++ src/timebin.rs | 13 + src/timebin/basic.rs | 278 ++++++++++++++++ src/timebin/cached.rs | 4 + src/timebin/cached/reader.rs | 192 ++++++++++++ src/timebin/fromevents.rs | 80 +++++ src/timebin/fromlayers.rs | 149 +++++++++ src/timebin/gapfill.rs | 489 +++++++++++++++++++++++++++++ src/timebin/grid.rs | 12 + src/timebin/timebin.rs | 0 src/timebinnedjson.rs | 472 ++++++++++++++++++++++++++++ src/transform.rs | 100 ++++++ 48 files changed, 6944 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/boxed.rs create mode 100644 src/cbor_stream.rs create mode 100644 src/collect.rs create mode 100644 src/collect_adapter.rs create mode 100644 src/dtflags.rs create mode 100644 src/events.rs create mode 100644 src/events/convertforbinning.rs create mode 100644 src/eventsplainreader.rs create mode 100644 src/filechunkread.rs create mode 100644 src/firsterr.rs create mode 100644 src/framed_bytes.rs create mode 100644 src/frames.rs create mode 100644 src/frames/eventsfromframes.rs create mode 100644 src/frames/inmem.rs create mode 100644 src/generators.rs create mode 100644 src/instrument.rs create mode 100644 src/itemclone.rs create mode 100644 src/json_stream.rs create mode 100644 src/lenframed.rs create mode 100644 src/lib.rs create mode 100644 src/needminbuffer.rs create mode 100644 src/plaineventscbor.rs create mode 100644 src/plaineventsjson.rs create mode 100644 src/plaineventsstream.rs create mode 100644 src/print_on_done.rs create mode 100644 src/rangefilter2.rs create mode 100644 src/rangefilter2/test.rs create mode 100644 src/slidebuf.rs create mode 100644 src/streamtimeout.rs create mode 100644 src/tcprawclient.rs create mode 100644 src/test.rs create mode 100644 src/test/collect.rs create mode 100644 src/test/events.rs create mode 100644 src/test/timebin.rs create mode 100644 src/teststream.rs create mode 100644 src/timebin.rs create mode 100644 src/timebin/basic.rs create mode 100644 src/timebin/cached.rs create mode 100644 src/timebin/cached/reader.rs create mode 100644 src/timebin/fromevents.rs create mode 100644 src/timebin/fromlayers.rs create mode 100644 src/timebin/gapfill.rs create mode 100644 src/timebin/grid.rs create mode 100644 src/timebin/timebin.rs create mode 100644 src/timebinnedjson.rs create mode 100644 src/transform.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b72444 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/Cargo.lock +/target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..725225f --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "daqbuf-streams" +version = "0.0.3" +authors = ["Dominik Werder "] +edition = "2021" + +[dependencies] +futures-util = "0.3.15" +pin-project = "1.0.12" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +serde_cbor = "0.11.1" +typetag = "0.2.18" +ciborium = "0.2.1" +bytes = "1.8" +arrayref = "0.3.6" +crc32fast = "1.3.2" +byteorder = "1.4.3" +async-channel = "1.9.0" +rand_xoshiro = "0.6.0" +http = "1" +http-body = "1" +http-body-util = "0.1.0" +thiserror = "=0.0.1" +chrono = { version = "0.4.38", features = ["serde"] } +wasmer = { version = "5.0.1", default-features = false, features = ["sys", "cranelift"], optional = true } +netpod = { path = "../daqbuf-netpod", package = "daqbuf-netpod" } +query = { path = "../daqbuf-query", package = "daqbuf-query" } +items_0 = { path = "../daqbuf-items-0", package = "daqbuf-items-0" } +items_2 = { path = "../daqbuf-items-2", package = "daqbuf-items-2" } +parse = { path = "../daqbuf-parse", package = "daqbuf-parse" } + +#[dev-dependencies] +#taskrun = + +[features] +wasm_transform = ["wasmer"] +indev = [] +tests-runtime = [] + +[patch.crates-io] +thiserror = { git = "https://github.com/dominikwerder/thiserror.git", branch = "cstm" } diff --git a/src/boxed.rs b/src/boxed.rs new file mode 100644 index 0000000..2e6f178 --- /dev/null +++ b/src/boxed.rs @@ -0,0 +1,57 @@ +use futures_util::stream::StreamExt; +use futures_util::Stream; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::transform::TransformProperties; +use items_0::transform::WithTransformProperties; +use items_0::Events; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +pub struct IntoBoxedEventStream +where + T: Events, + INP: Stream> + WithTransformProperties, +{ + //inp: Pin>>>, + inp: Pin>, +} + +impl Stream for IntoBoxedEventStream +where + T: Events, + INP: Stream> + WithTransformProperties, +{ + type Item = Sitemty>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + match self.inp.poll_next_unpin(cx) { + Ready(Some(item)) => Ready(Some(match item { + Ok(item) => Ok(match item { + StreamItem::DataItem(item) => StreamItem::DataItem(match item { + RangeCompletableItem::RangeComplete => RangeCompletableItem::RangeComplete, + RangeCompletableItem::Data(item) => RangeCompletableItem::Data(Box::new(item)), + }), + StreamItem::Log(item) => StreamItem::Log(item), + StreamItem::Stats(item) => StreamItem::Stats(item), + }), + Err(e) => Err(e), + })), + Ready(None) => Ready(None), + Pending => Pending, + } + } +} + +impl WithTransformProperties for IntoBoxedEventStream +where + T: Events, + INP: Stream> + WithTransformProperties, +{ + fn query_transform_properties(&self) -> TransformProperties { + self.inp.query_transform_properties() + } +} diff --git a/src/cbor_stream.rs b/src/cbor_stream.rs new file mode 100644 index 0000000..0a1bf10 --- /dev/null +++ b/src/cbor_stream.rs @@ -0,0 +1,366 @@ +use crate::streamtimeout::StreamTimeout2; +use crate::streamtimeout::TimeoutableStream; +use bytes::Buf; +use bytes::BufMut; +use bytes::Bytes; +use bytes::BytesMut; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::sitem_err2_from_string; +use items_0::streamitem::sitem_err_from_string; +use items_0::streamitem::LogItem; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::Events; +use items_0::WithLen; +use items_2::eventsdim0::EventsDim0; +use items_2::eventsdim1::EventsDim1; +use netpod::log::Level; +use netpod::log::*; +use netpod::ScalarType; +use netpod::Shape; +use std::io::Cursor; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use std::time::Duration; + +const FRAME_HEAD_LEN: usize = 16; +const FRAME_PAYLOAD_MAX: u32 = 1024 * 1024 * 80; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "CborStream")] +pub enum Error { + FromSlice(#[from] std::array::TryFromSliceError), + Msg(String), + Ciborium(#[from] ciborium::de::Error), +} + +struct ErrMsg(E) +where + E: ToString; + +impl From> for Error +where + E: ToString, +{ + fn from(value: ErrMsg) -> Self { + Self::Msg(value.0.to_string()) + } +} + +pub struct CborBytes(Bytes); + +impl CborBytes { + pub fn into_inner(self) -> Bytes { + self.0 + } + + pub fn len(&self) -> u32 { + self.0.len() as _ + } +} + +impl WithLen for CborBytes { + fn len(&self) -> usize { + self.len() as usize + } +} + +impl From for Bytes { + fn from(value: CborBytes) -> Self { + value.0 + } +} + +pub type CborStream = Pin> + Send>>; + +// TODO move this type decl because it is not specific to cbor +pub type SitemtyDynEventsStream = Pin>> + Send>>; + +pub fn events_stream_to_cbor_stream( + stream: SitemtyDynEventsStream, + timeout_provider: Box, +) -> impl Stream> { + let ivl = Duration::from_millis(4000); + let stream = TimeoutableStream::new(ivl, timeout_provider, stream); + let stream = stream.map(|x| match x { + Some(x) => map_events(x), + None => make_keepalive(), + }); + let prepend = { + let item = make_keepalive(); + futures_util::stream::iter([item]) + }; + prepend.chain(stream) +} + +fn map_events(x: Sitemty>) -> Result { + match x { + Ok(x) => match x { + StreamItem::DataItem(x) => match x { + RangeCompletableItem::Data(evs) => { + if false { + use items_0::AsAnyRef; + // TODO impl generically on EventsDim0 ? + if let Some(evs) = evs.as_any_ref().downcast_ref::>() { + let mut buf = Vec::new(); + ciborium::into_writer(evs, &mut buf).map_err(|e| Error::Msg(e.to_string()))?; + let bytes = Bytes::from(buf); + let _item = CborBytes(bytes); + // Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))) + } else { + let _item = LogItem::from_node(0, Level::DEBUG, format!("cbor stream discarded item")); + // Ok(StreamItem::Log(item)) + }; + } + let mut k = evs; + let evs = if let Some(j) = k.as_any_mut().downcast_mut::() { + use items_0::AsAnyMut; + match j { + items_2::channelevents::ChannelEvents::Events(m) => { + if let Some(g) = m + .as_any_mut() + .downcast_mut::>() + { + trace!("consider container EnumVariant"); + let mut out = items_2::eventsdim0enum::EventsDim0Enum::new(); + for (&ts, val) in g.tss.iter().zip(g.values.iter()) { + out.push_back(ts, val.ix(), val.name_string()); + } + Box::new(items_2::channelevents::ChannelEvents::Events(Box::new(out))) + } else { + trace!("consider container channel events other events {}", k.type_name()); + k + } + } + items_2::channelevents::ChannelEvents::Status(_) => { + trace!("consider container channel events status {}", k.type_name()); + k + } + } + } else { + trace!("consider container else {}", k.type_name()); + k + }; + let buf = evs.to_cbor_vec_u8(); + let bytes = Bytes::from(buf); + let item = CborBytes(bytes); + Ok(item) + } + RangeCompletableItem::RangeComplete => { + use ciborium::cbor; + let item = cbor!({ + "rangeFinal" => true, + }) + .map_err(|e| Error::Msg(e.to_string()))?; + let mut buf = Vec::with_capacity(64); + ciborium::into_writer(&item, &mut buf).map_err(|e| Error::Msg(e.to_string()))?; + let bytes = Bytes::from(buf); + let item = CborBytes(bytes); + Ok(item) + } + }, + StreamItem::Log(item) => { + info!("{item:?}"); + let item = CborBytes(Bytes::new()); + Ok(item) + } + StreamItem::Stats(item) => { + info!("{item:?}"); + let item = CborBytes(Bytes::new()); + Ok(item) + } + }, + Err(e) => { + use ciborium::cbor; + let item = cbor!({ + "error" => e.to_string(), + }) + .map_err(|e| Error::Msg(e.to_string()))?; + let mut buf = Vec::with_capacity(64); + ciborium::into_writer(&item, &mut buf).map_err(|e| Error::Msg(e.to_string()))?; + let bytes = Bytes::from(buf); + let item = CborBytes(bytes); + Ok(item) + } + } +} + +fn make_keepalive() -> Result { + use ciborium::cbor; + let item = cbor!({ + "type" => "keepalive", + }) + .map_err(ErrMsg)?; + let mut buf = Vec::with_capacity(64); + ciborium::into_writer(&item, &mut buf).map_err(ErrMsg)?; + let bytes = Bytes::from(buf); + let item = Ok(CborBytes(bytes)); + item +} + +pub struct FramedBytesToSitemtyDynEventsStream { + inp: S, + scalar_type: ScalarType, + shape: Shape, + buf: BytesMut, +} + +impl FramedBytesToSitemtyDynEventsStream { + pub fn new(inp: S, scalar_type: ScalarType, shape: Shape) -> Self { + Self { + inp, + scalar_type, + shape, + buf: BytesMut::with_capacity(1024 * 256), + } + } + + fn try_parse(&mut self) -> Result>>, Error> { + // debug!("try_parse {}", self.buf.len()); + if self.buf.len() < FRAME_HEAD_LEN { + return Ok(None); + } + let n = u32::from_le_bytes(self.buf[..4].try_into()?); + if n > FRAME_PAYLOAD_MAX { + let e = ErrMsg(format!("frame too large {n}")).into(); + error!("{e}"); + return Err(e); + } + let frame_len = FRAME_HEAD_LEN + n as usize; + let adv = (frame_len + 7) / 8 * 8; + assert!(adv % 8 == 0); + assert!(adv >= frame_len); + assert!(adv < 8 + frame_len); + if self.buf.len() < adv { + // debug!("not enough {} {}", n, self.buf.len()); + return Ok(None); + } + let buf = &self.buf[FRAME_HEAD_LEN..frame_len]; + let val: ciborium::Value = ciborium::from_reader(std::io::Cursor::new(buf)).map_err(ErrMsg)?; + // debug!("decoded ciborium value {val:?}"); + let item = if let Some(map) = val.as_map() { + let keys: Vec<&str> = map.iter().map(|k| k.0.as_text().unwrap_or("(none)")).collect(); + debug!("keys {keys:?}"); + if let Some(x) = map.get(0) { + if let Some(y) = x.0.as_text() { + if y == "rangeFinal" { + if let Some(y) = x.1.as_bool() { + if y { + Some(StreamItem::DataItem( + RangeCompletableItem::>::RangeComplete, + )) + } else { + None + } + } else { + None + } + } else { + None + } + } else { + None + } + } else { + None + } + } else { + None + }; + let item = if let Some(x) = item { + Some(x) + } else { + let item = decode_cbor_to_box_events(buf, &self.scalar_type, &self.shape)?; + debug!("decoded boxed events len {}", item.len()); + Some(StreamItem::DataItem(RangeCompletableItem::Data(item))) + }; + self.buf.advance(adv); + if let Some(x) = item { + Ok(Some(Ok(x))) + } else { + let item = LogItem::from_node(0, Level::DEBUG, format!("decoded ciborium Value")); + Ok(Some(Ok(StreamItem::Log(item)))) + } + } +} + +impl Stream for FramedBytesToSitemtyDynEventsStream +where + S: Stream> + Unpin, + E: std::error::Error, +{ + type Item = ::Item; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break match self.try_parse() { + Ok(Some(x)) => Ready(Some(x.map_err(|e| sitem_err2_from_string(e)))), + Ok(None) => match self.inp.poll_next_unpin(cx) { + Ready(Some(x)) => match x { + Ok(x) => { + self.buf.put_slice(&x); + continue; + } + Err(e) => Ready(Some(sitem_err_from_string(e))), + }, + Ready(None) => { + if self.buf.len() > 0 { + warn!("remaining bytes in input buffer, input closed len {}", self.buf.len()); + } + Ready(None) + } + Pending => Pending, + }, + Err(e) => Ready(Some(sitem_err_from_string(e))), + }; + } + } +} + +macro_rules! cbor_scalar { + ($ty:ident, $buf:expr) => {{ + type T = $ty; + type C = EventsDim0; + let item: C = ciborium::from_reader(Cursor::new($buf))?; + Box::new(item) + }}; +} + +macro_rules! cbor_wave { + ($ty:ident, $buf:expr) => {{ + type T = $ty; + type C = EventsDim1; + let item: C = ciborium::from_reader(Cursor::new($buf))?; + Box::new(item) + }}; +} + +fn decode_cbor_to_box_events(buf: &[u8], scalar_type: &ScalarType, shape: &Shape) -> Result, Error> { + let item: Box = match shape { + Shape::Scalar => match scalar_type { + ScalarType::U8 => cbor_scalar!(u8, buf), + ScalarType::U16 => cbor_scalar!(u16, buf), + ScalarType::U32 => cbor_scalar!(u32, buf), + ScalarType::U64 => cbor_scalar!(u64, buf), + ScalarType::I8 => cbor_scalar!(i8, buf), + ScalarType::I16 => cbor_scalar!(i16, buf), + ScalarType::I32 => cbor_scalar!(i32, buf), + ScalarType::I64 => cbor_scalar!(i64, buf), + ScalarType::F32 => cbor_scalar!(f32, buf), + ScalarType::F64 => cbor_scalar!(f64, buf), + _ => return Err(ErrMsg(format!("decode_cbor_to_box_events {:?} {:?}", scalar_type, shape)).into()), + }, + Shape::Wave(_) => match scalar_type { + ScalarType::U8 => cbor_wave!(u8, buf), + ScalarType::U16 => cbor_wave!(u16, buf), + ScalarType::I64 => cbor_wave!(i64, buf), + _ => return Err(ErrMsg(format!("decode_cbor_to_box_events {:?} {:?}", scalar_type, shape)).into()), + }, + Shape::Image(_, _) => todo!(), + }; + Ok(item) +} diff --git a/src/collect.rs b/src/collect.rs new file mode 100644 index 0000000..4f9e99e --- /dev/null +++ b/src/collect.rs @@ -0,0 +1,220 @@ +use crate::streamtimeout::StreamTimeout2; +use futures_util::Future; +use futures_util::FutureExt; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::collect_s::CollectableDyn; +use items_0::collect_s::CollectedDyn; +use items_0::collect_s::CollectorDyn; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StatsItem; +use items_0::streamitem::StreamItem; +use items_0::WithLen; +use netpod::log::*; +use netpod::range::evrange::SeriesRange; +use netpod::BinnedRangeEnum; +use netpod::DiskStats; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use std::time::Instant; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "CollectDyn")] +pub enum Error { + Msg(String), + NoResultNoCollector, +} + +struct ErrMsg(E) +where + E: ToString; + +impl From> for Error +where + E: ToString, +{ + fn from(value: ErrMsg) -> Self { + Self::Msg(value.0.to_string()) + } +} + +pub enum CollectResult { + Timeout, + Some(T), +} + +pub struct Collect { + inp: Pin>> + Send>>, + events_max: u64, + bytes_max: u64, + range: Option, + binrange: Option, + collector: Option>, + range_final: bool, + timeout: bool, + timer: Pin + Send>>, + done_input: bool, +} + +impl Collect { + pub fn new( + inp: Pin>> + Send>>, + deadline: Instant, + events_max: u64, + bytes_max: u64, + range: Option, + binrange: Option, + timeout_provider: Box, + ) -> Self { + Self { + inp, + events_max, + bytes_max, + range, + binrange, + collector: None, + range_final: false, + timeout: false, + timer: timeout_provider.timeout_intervals(deadline.saturating_duration_since(Instant::now())), + done_input: false, + } + } + + fn handle_item(&mut self, item: Sitemty>) -> Result<(), Error> { + match item { + Ok(item) => match item { + StreamItem::DataItem(item) => match item { + RangeCompletableItem::RangeComplete => { + self.range_final = true; + if let Some(coll) = self.collector.as_mut() { + coll.set_range_complete(); + } else { + warn!("collect received RangeComplete but no collector yet"); + } + Ok(()) + } + RangeCompletableItem::Data(mut item) => { + trace!("collect sees len {}", item.len()); + let coll = self.collector.get_or_insert_with(|| item.new_collector()); + coll.ingest(&mut item); + if coll.len() as u64 >= self.events_max { + info!("reached events_max {} / {}", coll.len(), self.events_max); + coll.set_continue_at_here(); + self.done_input = true; + } + if coll.byte_estimate() >= self.bytes_max { + info!("reached bytes_max {} / {}", coll.byte_estimate(), self.events_max); + coll.set_continue_at_here(); + self.done_input = true; + } + Ok(()) + } + }, + StreamItem::Log(item) => { + if item.level == Level::ERROR { + error!("node {} msg {}", item.node_ix, item.msg); + } else if item.level == Level::WARN { + warn!("node {} msg {}", item.node_ix, item.msg); + } else if item.level == Level::INFO { + info!("node {} msg {}", item.node_ix, item.msg); + } else if item.level == Level::DEBUG { + debug!("node {} msg {}", item.node_ix, item.msg); + } else if item.level == Level::TRACE { + trace!("node {} msg {}", item.node_ix, item.msg); + } + Ok(()) + } + StreamItem::Stats(item) => { + trace!("collect stats {:?}", item); + match item { + // TODO factor and simplify the stats collection: + StatsItem::EventDataReadStats(_) => {} + StatsItem::RangeFilterStats(_) => {} + StatsItem::DiskStats(item) => match item { + DiskStats::OpenStats(_) => { + //total_duration += k.duration; + } + DiskStats::SeekStats(_) => { + //total_duration += k.duration; + } + DiskStats::ReadStats(_) => { + //total_duration += k.duration; + } + DiskStats::ReadExactStats(_) => { + //total_duration += k.duration; + } + }, + _ => {} + } + Ok(()) + } + }, + Err(e) => { + // TODO Need to use some flags to get good enough error message for remote user. + Err(ErrMsg(e).into()) + } + } + } +} + +impl Future for Collect { + type Output = Result>, Error>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll { + use Poll::*; + let span = tracing::span!(Level::INFO, "Collect"); + let _spg = span.enter(); + loop { + break if self.done_input { + if self.timeout { + if let Some(coll) = self.collector.as_mut() { + info!("Collect call set_timed_out"); + coll.set_timed_out(); + } else { + warn!("collect timeout but no collector yet"); + } + } + // TODO use range_final and timeout in result. + match self.collector.take() { + Some(mut coll) => match coll.result(self.range.clone(), self.binrange.clone()) { + Ok(res) => { + //info!("collect stats total duration: {:?}", total_duration); + Ready(Ok(CollectResult::Some(res))) + } + Err(e) => Ready(Err(ErrMsg(e).into())), + }, + None => { + debug!("no result because no collector was created"); + Ready(Ok(CollectResult::Timeout)) + } + } + } else { + match self.timer.poll_unpin(cx) { + Ready(()) => { + self.timeout = true; + self.done_input = true; + continue; + } + Pending => match self.inp.poll_next_unpin(cx) { + Ready(Some(item)) => match self.handle_item(item) { + Ok(()) => { + continue; + } + Err(e) => { + error!("Collect {e}"); + Ready(Err(e)) + } + }, + Ready(None) => { + self.done_input = true; + continue; + } + Pending => Pending, + }, + } + }; + } + } +} diff --git a/src/collect_adapter.rs b/src/collect_adapter.rs new file mode 100644 index 0000000..064c3ac --- /dev/null +++ b/src/collect_adapter.rs @@ -0,0 +1,124 @@ +use std::fmt; +use std::time::Duration; +use tracing::Instrument; + +async fn collect_in_span( + stream: S, + deadline: Instant, + events_max: u64, + range: Option, + binrange: Option, +) -> Result, Error> +where + S: Stream> + Unpin, + T: CollectableDyn, +{ + info!("collect events_max {events_max} deadline {deadline:?}"); + let mut collector: Option> = None; + let mut stream = stream; + let deadline = deadline.into(); + let mut range_complete = false; + let mut timed_out = false; + let mut total_duration = Duration::ZERO; + loop { + let item = match tokio::time::timeout_at(deadline, stream.next()).await { + Ok(Some(k)) => k, + Ok(None) => break, + Err(_e) => { + warn!("collect timeout"); + timed_out = true; + if let Some(coll) = collector.as_mut() { + info!("collect_in_span call set_timed_out"); + coll.set_timed_out(); + } else { + warn!("collect_in_span collect timeout but no collector yet"); + } + break; + } + }; + match item { + Ok(item) => match item { + StreamItem::DataItem(item) => match item { + RangeCompletableItem::RangeComplete => { + range_complete = true; + if let Some(coll) = collector.as_mut() { + coll.set_range_complete(); + } else { + warn!("collect_in_span received RangeComplete but no collector yet"); + } + } + RangeCompletableItem::Data(mut item) => { + trace!("collect_in_span sees len {}", item.len()); + if collector.is_none() { + let c = item.new_collector(); + collector = Some(c); + } + let coll = collector.as_mut().unwrap(); + coll.ingest(&mut item); + if coll.len() as u64 >= events_max { + warn!("span reached events_max {}", events_max); + info!("collect_in_span call set_continue_at_here"); + coll.set_continue_at_here(); + break; + } + } + }, + StreamItem::Log(item) => { + trace!("collect_in_span log {:?}", item); + } + StreamItem::Stats(item) => { + trace!("collect_in_span stats {:?}", item); + match item { + // TODO factor and simplify the stats collection: + StatsItem::EventDataReadStats(_) => {} + StatsItem::RangeFilterStats(_) => {} + StatsItem::DiskStats(item) => match item { + DiskStats::OpenStats(k) => { + total_duration += k.duration; + } + DiskStats::SeekStats(k) => { + total_duration += k.duration; + } + DiskStats::ReadStats(k) => { + total_duration += k.duration; + } + DiskStats::ReadExactStats(k) => { + total_duration += k.duration; + } + }, + _ => {} + } + } + }, + Err(e) => { + // TODO Need to use some flags to get good enough error message for remote user. + return Err(ErrMsg(e).into()); + } + } + } + let _ = range_complete; + let _ = timed_out; + let res = collector + .ok_or_else(|| Error::NoResultNoCollector)? + .result(range, binrange) + .map_err(ErrMsg)?; + info!("collect_in_span stats total duration: {:?}", total_duration); + Ok(res) +} + +async fn collect( + stream: S, + deadline: Instant, + events_max: u64, + range: Option, + binrange: Option, +) -> Result, Error> +where + S: Stream> + Unpin, + T: CollectableDyn + WithLen + fmt::Debug, +{ + let span = span!(Level::INFO, "collect"); + collect_in_span(stream, deadline, events_max, range, binrange) + .instrument(span) + .await +} diff --git a/src/dtflags.rs b/src/dtflags.rs new file mode 100644 index 0000000..c8ff5fd --- /dev/null +++ b/src/dtflags.rs @@ -0,0 +1,4 @@ +pub const COMPRESSION: u8 = 0x80; +pub const ARRAY: u8 = 0x40; +pub const BIG_ENDIAN: u8 = 0x20; +pub const SHAPE: u8 = 0x10; diff --git a/src/events.rs b/src/events.rs new file mode 100644 index 0000000..7d0f1ac --- /dev/null +++ b/src/events.rs @@ -0,0 +1 @@ +pub mod convertforbinning; diff --git a/src/events/convertforbinning.rs b/src/events/convertforbinning.rs new file mode 100644 index 0000000..f4fae64 --- /dev/null +++ b/src/events/convertforbinning.rs @@ -0,0 +1,166 @@ +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::RangeCompletableItem::*; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem::*; +use items_0::Empty; +use items_2::channelevents::ChannelEvents; +use items_2::eventsdim0::EventsDim0; +use netpod::EnumVariant; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +pub struct ConvertForBinning { + inp: Pin> + Send>>, +} + +impl ConvertForBinning { + pub fn new(inp: Pin> + Send>>) -> Self { + Self { inp } + } +} + +impl Stream for ConvertForBinning { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + match self.inp.poll_next_unpin(cx) { + Ready(Some(item)) => match &item { + Ok(DataItem(Data(cevs))) => match cevs { + ChannelEvents::Events(evs) => { + if let Some(evs) = evs.as_any_ref().downcast_ref::>() { + let mut dst = EventsDim0::::empty(); + for ((&ts, &pulse), val) in evs + .tss() + .iter() + .zip(evs.pulses.iter()) + .zip(evs.private_values_ref().iter()) + { + dst.push_back(ts, pulse, val.ix()); + } + let item = Ok(DataItem(Data(ChannelEvents::Events(Box::new(dst))))); + Ready(Some(item)) + } else if let Some(evs) = evs.as_any_ref().downcast_ref::>() { + let mut dst = EventsDim0::::empty(); + for ((&ts, &pulse), &val) in evs + .tss() + .iter() + .zip(evs.pulses.iter()) + .zip(evs.private_values_ref().iter()) + { + dst.push_back(ts, pulse, val as u8); + } + let item = Ok(DataItem(Data(ChannelEvents::Events(Box::new(dst))))); + Ready(Some(item)) + } else if let Some(evs) = evs.as_any_ref().downcast_ref::>() { + let mut dst = EventsDim0::::empty(); + for ((&ts, &pulse), _) in evs + .tss() + .iter() + .zip(evs.pulses.iter()) + .zip(evs.private_values_ref().iter()) + { + dst.push_back(ts, pulse, 1); + } + let item = Ok(DataItem(Data(ChannelEvents::Events(Box::new(dst))))); + Ready(Some(item)) + } else { + Ready(Some(item)) + } + } + ChannelEvents::Status(_) => Ready(Some(item)), + }, + _ => Ready(Some(item)), + }, + Ready(None) => Ready(None), + Pending => Pending, + } + } +} + +pub struct ConvertForTesting { + inp: Pin> + Send>>, +} + +impl ConvertForTesting { + pub fn new(inp: Pin> + Send>>) -> Self { + Self { inp } + } +} + +impl Stream for ConvertForTesting { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + match self.inp.poll_next_unpin(cx) { + Ready(Some(item)) => match &item { + Ok(DataItem(Data(cevs))) => match cevs { + ChannelEvents::Events(evs) => { + if let Some(evs) = evs.as_any_ref().downcast_ref::>() { + let buf = std::fs::read("evmod").unwrap_or(Vec::new()); + let s = String::from_utf8_lossy(&buf); + if s.contains("u8") { + use items_0::Empty; + let mut dst = EventsDim0::::empty(); + for (ts, val) in evs.tss().iter().zip(evs.private_values_ref().iter()) { + let v = (val * 1e6) as u8; + dst.push_back(*ts, 0, v); + } + let item = Ok(DataItem(Data(ChannelEvents::Events(Box::new(dst))))); + Ready(Some(item)) + } else if s.contains("i16") { + use items_0::Empty; + let mut dst = EventsDim0::::empty(); + for (ts, val) in evs.tss().iter().zip(evs.private_values_ref().iter()) { + let v = (val * 1e6) as i16 - 50; + dst.push_back(*ts, 0, v); + } + let item = Ok(DataItem(Data(ChannelEvents::Events(Box::new(dst))))); + Ready(Some(item)) + } else if s.contains("bool") { + use items_0::Empty; + let mut dst = EventsDim0::::empty(); + for (ts, val) in evs.tss().iter().zip(evs.private_values_ref().iter()) { + let g = u64::from_ne_bytes(val.to_ne_bytes()); + let val = g % 2 == 0; + dst.push_back(*ts, 0, val); + } + let item = Ok(DataItem(Data(ChannelEvents::Events(Box::new(dst))))); + Ready(Some(item)) + } else if s.contains("enum") { + use items_0::Empty; + let mut dst = EventsDim0::::empty(); + for (ts, val) in evs.tss().iter().zip(evs.private_values_ref().iter()) { + let buf = val.to_ne_bytes(); + let h = buf[0] ^ buf[1] ^ buf[2] ^ buf[3] ^ buf[4] ^ buf[5] ^ buf[6] ^ buf[7]; + dst.push_back(*ts, 0, EnumVariant::new(h as u16, h.to_string())); + } + let item = Ok(DataItem(Data(ChannelEvents::Events(Box::new(dst))))); + Ready(Some(item)) + } else if s.contains("string") { + use items_0::Empty; + let mut dst = EventsDim0::::empty(); + for (ts, val) in evs.tss().iter().zip(evs.private_values_ref().iter()) { + dst.push_back(*ts, 0, val.to_string()); + } + let item = Ok(DataItem(Data(ChannelEvents::Events(Box::new(dst))))); + Ready(Some(item)) + } else { + Ready(Some(item)) + } + } else { + Ready(Some(item)) + } + } + ChannelEvents::Status(_) => Ready(Some(item)), + }, + _ => Ready(Some(item)), + }, + Ready(None) => Ready(None), + Pending => Pending, + } + } +} diff --git a/src/eventsplainreader.rs b/src/eventsplainreader.rs new file mode 100644 index 0000000..8805865 --- /dev/null +++ b/src/eventsplainreader.rs @@ -0,0 +1,123 @@ +use crate::tcprawclient::OpenBoxedBytesStreamsBox; +use crate::timebin::cached::reader::EventsReadProvider; +use crate::timebin::cached::reader::EventsReading; +use crate::timebin::CacheReadProvider; +use futures_util::Future; +use futures_util::FutureExt; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::sitem_err_from_string; +use items_0::streamitem::Sitemty; +use items_2::channelevents::ChannelEvents; +use netpod::ReqCtx; +use query::api4::events::EventsSubQuery; +use std::pin::Pin; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "EventsPlainReader")] +pub enum Error { + Timebinned(#[from] crate::timebinnedjson::Error), +} + +type ChEvsBox = Pin> + Send>>; + +enum StreamState { + Opening(Pin> + Send>>), + Reading(ChEvsBox), +} + +struct InnerStream { + state: StreamState, +} + +impl Stream for InnerStream { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break match &mut self.state { + StreamState::Opening(fut) => match fut.poll_unpin(cx) { + Ready(Ok(x)) => { + self.state = StreamState::Reading(x); + continue; + } + Ready(Err(e)) => Ready(Some(sitem_err_from_string(e))), + Pending => Pending, + }, + StreamState::Reading(fut) => match fut.poll_next_unpin(cx) { + Ready(Some(x)) => Ready(Some(x)), + Ready(None) => Ready(None), + Pending => Pending, + }, + }; + } + } +} + +pub struct SfDatabufferEventReadProvider { + ctx: Arc, + open_bytes: OpenBoxedBytesStreamsBox, +} + +impl SfDatabufferEventReadProvider { + pub fn new(ctx: Arc, open_bytes: OpenBoxedBytesStreamsBox) -> Self { + Self { ctx, open_bytes } + } +} + +impl EventsReadProvider for SfDatabufferEventReadProvider { + fn read(&self, evq: EventsSubQuery) -> EventsReading { + let range = match evq.range() { + netpod::range::evrange::SeriesRange::TimeRange(x) => x.clone(), + netpod::range::evrange::SeriesRange::PulseRange(_) => panic!("not available for pulse range"), + }; + let ctx = self.ctx.clone(); + let open_bytes = self.open_bytes.clone(); + let state = StreamState::Opening(Box::pin(async move { + let ret = crate::timebinnedjson::timebinnable_stream_sf_databuffer_channelevents( + range, + evq.need_one_before_range(), + evq.ch_conf().clone(), + evq.transform().clone(), + evq.settings().clone(), + evq.log_level().into(), + ctx, + open_bytes, + ) + .await; + ret.map_err(|e| e.into()).map(|x| Box::pin(x) as _) + })); + let stream = InnerStream { state }; + EventsReading::new(Box::pin(stream)) + } +} + +pub struct DummyCacheReadProvider {} + +impl DummyCacheReadProvider { + pub fn new() -> Self { + Self {} + } +} + +impl CacheReadProvider for DummyCacheReadProvider { + fn read( + &self, + series: u64, + bin_len: netpod::DtMs, + msp: u64, + offs: std::ops::Range, + ) -> crate::timebin::cached::reader::CacheReading { + let stream = futures_util::future::ready(Ok(None)); + crate::timebin::cached::reader::CacheReading::new(Box::pin(stream)) + } + + fn write(&self, series: u64, bins: items_0::timebin::BinsBoxed) -> crate::timebin::cached::reader::CacheWriting { + let fut = futures_util::future::ready(Ok(())); + crate::timebin::cached::reader::CacheWriting::new(Box::pin(fut)) + } +} diff --git a/src/filechunkread.rs b/src/filechunkread.rs new file mode 100644 index 0000000..2bdc9c7 --- /dev/null +++ b/src/filechunkread.rs @@ -0,0 +1,55 @@ +use bytes::BytesMut; +use std::fmt; +use std::time::Duration; + +pub struct FileChunkRead { + buf: BytesMut, + duration: Duration, +} + +impl FileChunkRead { + pub fn with_buf(buf: BytesMut) -> Self { + Self { + buf, + duration: Duration::from_millis(0), + } + } + + pub fn with_buf_dur(buf: BytesMut, duration: Duration) -> Self { + Self { buf, duration } + } + + pub fn into_buf(self) -> BytesMut { + self.buf + } + + pub fn buf(&self) -> &BytesMut { + &self.buf + } + + pub fn buf_mut(&mut self) -> &mut BytesMut { + &mut self.buf + } + + pub fn buf_take(&mut self) -> BytesMut { + core::mem::replace(&mut self.buf, BytesMut::new()) + } + + pub fn duration(&self) -> &Duration { + &self.duration + } + + pub fn duration_mut(&mut self) -> &mut Duration { + &mut self.duration + } +} + +impl fmt::Debug for FileChunkRead { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("FileChunkRead") + .field("buf.len", &self.buf.len()) + .field("buf.cap", &self.buf.capacity()) + .field("duration", &self.duration) + .finish() + } +} diff --git a/src/firsterr.rs b/src/firsterr.rs new file mode 100644 index 0000000..a002b9a --- /dev/null +++ b/src/firsterr.rs @@ -0,0 +1,46 @@ +use crate::cbor_stream::CborBytes; +use futures_util::future; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::WithLen; + +pub fn non_empty(inp: S) -> impl Stream> +where + S: Stream>, + T: WithLen, +{ + inp.filter(|x| { + future::ready(match x { + Ok(x) => x.len() > 0, + Err(_) => true, + }) + }) +} + +pub fn non_empty_nongen(inp: S) -> impl Stream> +where + S: Stream>, +{ + inp.filter(|x| { + future::ready(match x { + Ok(x) => x.len() > 0, + Err(_) => true, + }) + }) +} + +pub fn only_first_err(inp: S) -> impl Stream> +where + S: Stream>, +{ + inp.take_while({ + let mut state = true; + move |x| { + let ret = state; + if x.is_err() { + state = false; + } + future::ready(ret) + } + }) +} diff --git a/src/framed_bytes.rs b/src/framed_bytes.rs new file mode 100644 index 0000000..b4fe883 --- /dev/null +++ b/src/framed_bytes.rs @@ -0,0 +1,135 @@ +use bytes::Buf; +use bytes::BufMut; +use bytes::Bytes; +use bytes::BytesMut; +use futures_util::Stream; +use futures_util::StreamExt; +use netpod::log::*; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +const FRAME_HEAD_LEN: usize = 16; +const FRAME_PAYLOAD_MAX: u32 = 1024 * 1024 * 8; +const BUF_MAX: usize = (FRAME_HEAD_LEN + FRAME_PAYLOAD_MAX as usize) * 2; + +#[allow(unused)] +macro_rules! trace_parse { + ($($arg:tt)*) => { + if false { + trace!($($arg)*); + } + }; +} + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "StreamFramedBytes")] +pub enum Error { + FrameTooLarge, + Logic, +} + +pub type BoxedFramedBytesStream = Pin> + Send>>; + +// TODO move this type decl because it is not specific to cbor +pub type SitemtyFramedBytesStream = Pin> + Send>>; + +pub enum State { + Reading, + Done, +} + +pub struct FramedBytesStream { + inp: S, + buf: BytesMut, + state: State, +} + +impl FramedBytesStream +where + S: Stream> + Unpin, + E: Into, +{ + pub fn new(inp: S) -> Self { + Self { + inp, + buf: BytesMut::with_capacity(1024 * 256), + state: State::Reading, + } + } + + fn try_parse(&mut self) -> Result, Error> { + trace_parse!("try_parse self.buf.len() {}", self.buf.len()); + if self.buf.len() < FRAME_HEAD_LEN { + return Ok(None); + } + let n = u32::from_le_bytes(self.buf[..4].try_into().map_err(|_| Error::Logic)?); + trace_parse!("try_parse n {}", n); + if n > FRAME_PAYLOAD_MAX { + let e = Error::FrameTooLarge; + return Err(e); + } + let frame_len = FRAME_HEAD_LEN + n as usize; + trace_parse!("try_parse frame_len {}", frame_len); + assert!(self.buf.len() <= self.buf.capacity()); + if self.buf.capacity() < frame_len { + let add_max = BUF_MAX - self.buf.capacity().min(BUF_MAX); + let nadd = ((frame_len.min(FRAME_PAYLOAD_MAX as usize) - self.buf.len()) * 2).min(add_max); + self.buf.reserve(nadd); + } + let adv = (frame_len + 7) / 8 * 8; + trace_parse!("try_parse adv {}", adv); + if self.buf.len() < adv { + Ok(None) + } else { + self.buf.advance(FRAME_HEAD_LEN); + let buf = self.buf.split_to(n as usize); + self.buf.advance(adv - frame_len); + Ok(Some(buf.freeze())) + } + } +} + +impl Stream for FramedBytesStream +where + S: Stream> + Unpin, + E: Into, +{ + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break match &self.state { + State::Reading => match self.try_parse() { + Ok(Some(x)) => Ready(Some(Ok(x))), + Ok(None) => match self.inp.poll_next_unpin(cx) { + Ready(Some(x)) => match x { + Ok(x) => { + self.buf.put_slice(&x); + continue; + } + Err(e) => { + self.state = State::Done; + Ready(Some(Err(e.into()))) + } + }, + Ready(None) => { + if self.buf.len() > 0 { + warn!("remaining bytes in input buffer, input closed len {}", self.buf.len()); + } + self.state = State::Done; + Ready(None) + } + Pending => Pending, + }, + Err(e) => { + self.state = State::Done; + Ready(Some(Err(e))) + } + }, + State::Done => Ready(None), + }; + } + } +} diff --git a/src/frames.rs b/src/frames.rs new file mode 100644 index 0000000..dbb234f --- /dev/null +++ b/src/frames.rs @@ -0,0 +1,2 @@ +pub mod eventsfromframes; +pub mod inmem; diff --git a/src/frames/eventsfromframes.rs b/src/frames/eventsfromframes.rs new file mode 100644 index 0000000..d87e189 --- /dev/null +++ b/src/frames/eventsfromframes.rs @@ -0,0 +1,119 @@ +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::framable::FrameTypeInnerStatic; +use items_0::streamitem::sitem_err_from_string; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::SitemErrTy; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_2::frame::decode_frame; +use items_2::inmem::InMemoryFrame; +use netpod::log::*; +use serde::de::DeserializeOwned; +use std::marker::PhantomData; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "FromFrames")] +pub enum Error {} + +pub struct EventsFromFrames { + inp: Pin, SitemErrTy>> + Send>>, + dbgdesc: String, + errored: bool, + completed: bool, + _m1: PhantomData, +} + +impl EventsFromFrames { + pub fn new( + inp: Pin, SitemErrTy>> + Send>>, + dbgdesc: String, + ) -> Self { + Self { + inp, + dbgdesc, + errored: false, + completed: false, + _m1: PhantomData, + } + } +} + +impl Stream for EventsFromFrames +where + O: FrameTypeInnerStatic + DeserializeOwned + Unpin, +{ + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + let span = span!(Level::INFO, "EvFrFr", id = tracing::field::Empty); + span.record("id", &self.dbgdesc); + let _spg = span.enter(); + loop { + break if self.completed { + panic!("poll_next on completed"); + } else if self.errored { + self.completed = true; + Ready(None) + } else { + match self.inp.poll_next_unpin(cx) { + Ready(Some(Ok(item))) => match item { + StreamItem::Log(item) => { + //info!("{} {:?} {}", item.node_ix, item.level, item.msg); + Ready(Some(Ok(StreamItem::Log(item)))) + } + StreamItem::Stats(item) => Ready(Some(Ok(StreamItem::Stats(item)))), + StreamItem::DataItem(frame) => match decode_frame::>(&frame) { + Ok(item) => match item { + Ok(item) => match item { + StreamItem::DataItem(item2) => match item2 { + RangeCompletableItem::Data(item3) => { + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item3))))) + } + RangeCompletableItem::RangeComplete => { + debug!("EventsFromFrames RangeComplete"); + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))) + } + }, + StreamItem::Log(k) => { + //info!("rcvd log: {} {:?} {}", k.node_ix, k.level, k.msg); + Ready(Some(Ok(StreamItem::Log(k)))) + } + StreamItem::Stats(k) => Ready(Some(Ok(StreamItem::Stats(k)))), + }, + Err(e) => { + error!("rcvd err: {}", e); + self.errored = true; + Ready(Some(Err(e))) + } + }, + Err(e) => { + error!( + "frame payload len {} tyid {:04x} {}", + frame.buf().len(), + frame.tyid(), + e + ); + self.errored = true; + Ready(Some(sitem_err_from_string(e))) + } + }, + }, + Ready(Some(Err(e))) => { + self.errored = true; + Ready(Some(sitem_err_from_string(e))) + } + Ready(None) => { + self.completed = true; + Ready(None) + } + Pending => Pending, + } + }; + } + } +} diff --git a/src/frames/inmem.rs b/src/frames/inmem.rs new file mode 100644 index 0000000..5cfda2e --- /dev/null +++ b/src/frames/inmem.rs @@ -0,0 +1,242 @@ +use crate::slidebuf::SlideBuf; +use bytes::Bytes; +use futures_util::pin_mut; +use futures_util::Stream; +use items_0::streamitem::SitemErrTy; +use items_0::streamitem::StreamItem; +use items_0::streamitem::TERM_FRAME_TYPE_ID; +use items_2::framable::INMEM_FRAME_FOOT; +use items_2::framable::INMEM_FRAME_HEAD; +use items_2::framable::INMEM_FRAME_MAGIC; +use items_2::inmem::InMemoryFrame; +use netpod::log::*; +use netpod::ByteSize; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +// use tokio::io::AsyncRead; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "InMem")] +pub enum Error { + Input, + Slidebuf(#[from] crate::slidebuf::Error), + IO(#[from] std::io::Error), + LessThanNeedMin, + LessThanHeader, + HugeFrame(u32), + BadMagic(u32), + TryFromSlice(#[from] std::array::TryFromSliceError), + BadCrc, + EnoughInputNothingParsed, +} + +pub type BoxedBytesStream = Pin> + Send>>; + +macro_rules! trace2 { ($($arg:tt)*) => ( if false { trace!($($arg)*); } ); } + +/// Interprets a byte stream as length-delimited frames. +/// +/// Emits each frame as a single item. Therefore, each item must fit easily into memory. +pub struct InMemoryFrameStream +where + T: Stream> + Unpin, +{ + inp: T, + // TODO since we moved to input stream of Bytes, we have the danger that the ring buffer + // is not large enough. Actually, this should rather use a RopeBuf with incoming owned bufs. + buf: SlideBuf, + need_min: usize, + done: bool, + complete: bool, + inp_bytes_consumed: u64, +} + +impl InMemoryFrameStream +where + T: Stream> + Unpin, +{ + pub fn type_name() -> &'static str { + std::any::type_name::() + } + + pub fn new(inp: T, bufcap: ByteSize) -> Self { + Self { + inp, + buf: SlideBuf::new(bufcap.bytes() as usize), + need_min: INMEM_FRAME_HEAD, + done: false, + complete: false, + inp_bytes_consumed: 0, + } + } + + fn poll_upstream(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + trace2!("poll_upstream"); + use Poll::*; + // use tokio::io::AsyncRead; + // use tokio::io::ReadBuf; + // let mut buf = ReadBuf::new(self.buf.available_writable_area(self.need_min.saturating_sub(self.buf.len()))?); + let inp = &mut self.inp; + pin_mut!(inp); + match inp.poll_next(cx) { + Ready(Some(Ok(x))) => match self.buf.available_writable_area(x.len()) { + Ok(dst) => { + dst[..x.len()].copy_from_slice(&x); + self.buf.wadv(x.len())?; + Ready(Ok(x.len())) + } + Err(e) => Ready(Err(e.into())), + }, + Ready(Some(Err(_e))) => Ready(Err(Error::Input)), + Ready(None) => Ready(Ok(0)), + Pending => Pending, + } + // match AsyncRead::poll_read(inp, cx, &mut buf) { + // Ready(Ok(())) => { + // let n = buf.filled().len(); + // self.buf.wadv(n)?; + // trace2!("recv bytes {}", n); + // Ready(Ok(n)) + // } + // Ready(Err(e)) => Ready(Err(e.into())), + // Pending => Pending, + // } + } + + // Try to consume bytes to parse a frame. + // Update the need_min to the most current state. + // Must only be called when at least `need_min` bytes are available. + fn parse(&mut self) -> Result, Error> { + let buf = self.buf.data(); + if buf.len() < self.need_min { + return Err(Error::LessThanNeedMin); + } + if buf.len() < INMEM_FRAME_HEAD { + return Err(Error::LessThanHeader); + } + let magic = u32::from_le_bytes(buf[0..4].try_into()?); + let encid = u32::from_le_bytes(buf[4..8].try_into()?); + let tyid = u32::from_le_bytes(buf[8..12].try_into()?); + let len = u32::from_le_bytes(buf[12..16].try_into()?); + let payload_crc_exp = u32::from_le_bytes(buf[16..20].try_into()?); + if magic != INMEM_FRAME_MAGIC { + let n = buf.len().min(64); + let u = String::from_utf8_lossy(&buf[0..n]); + let msg = format!( + "InMemoryFrameAsyncReadStream tryparse incorrect magic: {} buf as utf8: {:?}", + magic, u + ); + error!("{msg}"); + return Err(Error::BadMagic(magic)); + } + if len > 1024 * 1024 * 50 { + let msg = format!( + "InMemoryFrameAsyncReadStream tryparse huge buffer len {} self.inp_bytes_consumed {}", + len, self.inp_bytes_consumed + ); + error!("{msg}"); + return Err(Error::HugeFrame(len)); + } + let lentot = INMEM_FRAME_HEAD + INMEM_FRAME_FOOT + len as usize; + if buf.len() < lentot { + // TODO count cases in production + self.need_min = lentot; + return Ok(None); + } + let p1 = INMEM_FRAME_HEAD + len as usize; + let mut h = crc32fast::Hasher::new(); + h.update(&buf[..p1]); + let frame_crc = h.finalize(); + let mut h = crc32fast::Hasher::new(); + h.update(&buf[INMEM_FRAME_HEAD..p1]); + let payload_crc = h.finalize(); + let frame_crc_ind = u32::from_le_bytes(buf[p1..p1 + 4].try_into()?); + let payload_crc_match = payload_crc_exp == payload_crc; + let frame_crc_match = frame_crc_ind == frame_crc; + if !frame_crc_match || !payload_crc_match { + let _ss = String::from_utf8_lossy(&buf[..buf.len().min(256)]); + let msg = format!( + "InMemoryFrameAsyncReadStream tryparse crc mismatch A {} {}", + payload_crc_match, frame_crc_match, + ); + error!("{msg}"); + let e = Error::BadCrc; + return Err(e); + } + self.inp_bytes_consumed += lentot as u64; + // TODO metrics + //trace!("parsed frame well len {}", len); + let ret = InMemoryFrame { + len, + tyid, + encid, + buf: Bytes::from(buf[INMEM_FRAME_HEAD..p1].to_vec()), + }; + self.buf.adv(lentot)?; + self.need_min = INMEM_FRAME_HEAD; + Ok(Some(ret)) + } +} + +impl Stream for InMemoryFrameStream +where + T: Stream> + Unpin, +{ + type Item = Result, Error>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + let span = span!(Level::INFO, "InMemRd"); + let _spanguard = span.enter(); + loop { + break if self.complete { + panic!("{} poll_next on complete", Self::type_name()) + } else if self.done { + self.complete = true; + Ready(None) + } else if self.buf.len() >= self.need_min { + match self.parse() { + Ok(None) => { + if self.buf.len() >= self.need_min { + self.done = true; + let e = Error::EnoughInputNothingParsed; + Ready(Some(Err(e))) + } else { + continue; + } + } + Ok(Some(item)) => { + if item.tyid() == TERM_FRAME_TYPE_ID { + self.done = true; + continue; + } else { + Ready(Some(Ok(StreamItem::DataItem(item)))) + } + } + Err(e) => { + self.done = true; + Ready(Some(Err(e))) + } + } + } else { + match self.as_mut().poll_upstream(cx) { + Ready(Ok(n1)) => { + if n1 == 0 { + self.done = true; + continue; + } else { + continue; + } + } + Ready(Err(e)) => { + error!("poll_upstream need_min {} buf {:?} {:?}", self.need_min, self.buf, e); + self.done = true; + Ready(Some(Err(e))) + } + Pending => Pending, + } + }; + } + } +} diff --git a/src/generators.rs b/src/generators.rs new file mode 100644 index 0000000..286bcad --- /dev/null +++ b/src/generators.rs @@ -0,0 +1,540 @@ +use crate::frames::inmem::BoxedBytesStream; +use crate::transform::build_event_transform; +use futures_util::Future; +use futures_util::FutureExt; +use futures_util::Stream; +use futures_util::StreamExt; +use futures_util::TryStreamExt; +use items_0::container::ByteEstimate; +use items_0::on_sitemty_data; +use items_0::streamitem::sitem_data; +use items_0::streamitem::sitem_err2_from_string; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::Appendable; +use items_0::Empty; +use items_0::WithLen; +use items_2::channelevents::ChannelEvents; +use items_2::empty::empty_events_dyn_ev; +use items_2::eventsdim0::EventsDim0; +use items_2::eventsdim1::EventsDim1; +use items_2::framable::Framable; +use netpod::log::*; +use netpod::range::evrange::SeriesRange; +use netpod::timeunits::DAY; +use netpod::timeunits::MS; +use query::api4::events::EventsSubQuery; +use std::f64::consts::PI; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "Generator")] +pub enum Error { + UnsupportedIsEventBlobs, + Transform(#[from] crate::transform::Error), + Items2(#[from] items_2::Error), + BadChannelName, +} + +fn make_sleep_fut() -> Pin + Send>> { + todo!() +} + +pub fn make_test_channel_events_bytes_stream( + subq: EventsSubQuery, + node_count: u64, + node_ix: u64, +) -> Result { + if subq.is_event_blobs() { + let e = Error::UnsupportedIsEventBlobs; + error!("{e}"); + Err(e) + } else { + let mut tr = build_event_transform(subq.transform())?; + let stream = make_test_channel_events_stream_data(subq, node_count, node_ix)?; + let stream = stream.map(move |x| { + on_sitemty_data!(x, |x: ChannelEvents| { + match x { + ChannelEvents::Events(evs) => { + let evs = tr.0.transform(evs); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(ChannelEvents::Events( + evs, + )))) + } + ChannelEvents::Status(x) => Ok(StreamItem::DataItem(RangeCompletableItem::Data( + ChannelEvents::Status(x), + ))), + } + }) + }); + let stream = stream + .map_err(sitem_err2_from_string) + .map(|x| x.make_frame_dyn().map(|x| x.freeze()).map_err(sitem_err2_from_string)); + let ret = Box::pin(stream); + Ok(ret) + } +} + +// is also used from nodenet::conn +pub fn make_test_channel_events_stream_data( + subq: EventsSubQuery, + node_count: u64, + node_ix: u64, +) -> Result> + Send>>, Error> { + let empty = empty_events_dyn_ev(subq.ch_conf().scalar_type(), subq.ch_conf().shape())?; + let empty = sitem_data(ChannelEvents::Events(empty)); + let stream = make_test_channel_events_stream_data_inner(subq, node_count, node_ix)?; + let ret = futures_util::stream::iter([empty]).chain(stream); + let ret = Box::pin(ret); + Ok(ret) +} + +fn make_test_channel_events_stream_data_inner( + subq: EventsSubQuery, + node_count: u64, + node_ix: u64, +) -> Result> + Send>>, Error> { + debug!("use test backend data"); + let chn = subq.name(); + let range = subq.range().clone(); + let one_before = subq.need_one_before_range(); + if chn == "test-gen-i32-dim0-v00" { + Ok(Box::pin(GenerateI32V00::new(node_ix, node_count, range, one_before))) + } else if chn == "test-gen-i32-dim0-v01" { + Ok(Box::pin(GenerateI32V01::new(node_ix, node_count, range, one_before))) + } else if chn == "test-gen-f64-dim1-v00" { + Ok(Box::pin(GenerateF64V00::new(node_ix, node_count, range, one_before))) + } else { + let na: Vec<_> = chn.split("-").collect(); + if na.len() != 3 { + Err(Error::BadChannelName) + } else { + if na[0] != "inmem" { + Err(Error::BadChannelName) + } else { + let _range = subq.range().clone(); + if na[1] == "d0" { + if na[2] == "i32" { + //generator::generate_i32(node_ix, node_count, range) + panic!() + } else if na[2] == "f32" { + //generator::generate_f32(node_ix, node_count, range) + panic!() + } else { + Err(Error::BadChannelName) + } + } else { + Err(Error::BadChannelName) + } + } + } + } +} + +pub struct GenerateI32V00 { + ts: u64, + dts: u64, + tsend: u64, + #[allow(unused)] + c1: u64, + timeout: Option + Send>>>, + do_throttle: bool, + done: bool, + done_range_final: bool, +} + +impl GenerateI32V00 { + pub fn new(node_ix: u64, node_count: u64, range: SeriesRange, one_before_range: bool) -> Self { + let range = match range { + SeriesRange::TimeRange(k) => k, + SeriesRange::PulseRange(_) => todo!(), + }; + let ivl = MS * 1000; + let dts = ivl * node_count as u64; + let ts = (range.beg / ivl + node_ix - if one_before_range { 1 } else { 0 }) * ivl; + let tsend = range.end; + Self { + ts, + dts, + tsend, + c1: 0, + timeout: None, + do_throttle: false, + done: false, + done_range_final: false, + } + } + + fn make_batch(&mut self) -> Sitemty { + type T = i32; + let mut item = EventsDim0::empty(); + let mut ts = self.ts; + loop { + if self.ts >= self.tsend || item.byte_estimate() > 100 { + break; + } + let pulse = ts; + let value = (ts / (MS * 100) % 1000) as T; + item.push(ts, pulse, value); + ts += self.dts; + } + self.ts = ts; + let w = ChannelEvents::Events(Box::new(item) as _); + let w = sitem_data(w); + w + } +} + +impl Stream for GenerateI32V00 { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break if self.done { + Ready(None) + } else if self.ts >= self.tsend { + self.done = true; + self.done_range_final = true; + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))) + } else if !self.do_throttle { + // To use the generator without throttling, use this scope + Ready(Some(self.make_batch())) + } else if let Some(fut) = self.timeout.as_mut() { + match fut.poll_unpin(cx) { + Ready(()) => { + self.timeout = None; + Ready(Some(self.make_batch())) + } + Pending => Pending, + } + } else { + self.timeout = Some(make_sleep_fut()); + continue; + }; + } + } +} + +pub struct GenerateI32V01 { + ivl: u64, + ts: u64, + dts: u64, + tsend: u64, + #[allow(unused)] + c1: u64, + node_ix: u64, + timeout: Option + Send>>>, + do_throttle: bool, + have_range_final: bool, + done: bool, + done_range_final: bool, +} + +impl GenerateI32V01 { + pub fn new(node_ix: u64, node_count: u64, range: SeriesRange, one_before_range: bool) -> Self { + let range = match range { + SeriesRange::TimeRange(k) => k, + SeriesRange::PulseRange(_) => todo!(), + }; + let ivl = MS * 500; + let dts = ivl * node_count as u64; + let ts = (range.beg / ivl + node_ix - if one_before_range { 1 } else { 0 }) * ivl; + let tsend = range.end.min(DAY); + let have_range_final = range.end < (DAY - ivl); + debug!( + "GenerateI32V01::new ivl {} dts {} ts {} one_before_range {}", + ivl, dts, ts, one_before_range + ); + Self { + ivl, + ts, + dts, + tsend, + c1: 0, + node_ix, + timeout: None, + do_throttle: false, + have_range_final, + done: false, + done_range_final: false, + } + } + + fn make_batch(&mut self) -> Sitemty { + type T = i32; + let mut item = EventsDim0::empty(); + let mut ts = self.ts; + loop { + if self.ts >= self.tsend || item.byte_estimate() > 100 { + break; + } + let pulse = ts; + let value = (ts / self.ivl) as T; + if false { + info!( + "v01 node {} made event ts {} pulse {} value {}", + self.node_ix, ts, pulse, value + ); + } + item.push(ts, pulse, value); + ts += self.dts; + } + self.ts = ts; + let w = ChannelEvents::Events(Box::new(item) as _); + let w = sitem_data(w); + w + } +} + +impl Stream for GenerateI32V01 { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break if self.done { + Ready(None) + } else if self.ts >= self.tsend { + self.done = true; + self.done_range_final = true; + if self.have_range_final { + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))) + } else { + continue; + } + } else if !self.do_throttle { + // To use the generator without throttling, use this scope + Ready(Some(self.make_batch())) + } else if let Some(fut) = self.timeout.as_mut() { + match fut.poll_unpin(cx) { + Ready(()) => { + self.timeout = None; + Ready(Some(self.make_batch())) + } + Pending => Pending, + } + } else { + self.timeout = Some(make_sleep_fut()); + continue; + }; + } + } +} + +pub struct GenerateF64V00 { + ivl: u64, + ts: u64, + dts: u64, + tsend: u64, + node_ix: u64, + timeout: Option + Send>>>, + do_throttle: bool, + done: bool, + done_range_final: bool, +} + +impl GenerateF64V00 { + pub fn new(node_ix: u64, node_count: u64, range: SeriesRange, one_before_range: bool) -> Self { + let range = match range { + SeriesRange::TimeRange(k) => k, + SeriesRange::PulseRange(_) => todo!(), + }; + let ivl = MS * 100; + let dts = ivl * node_count as u64; + let ts = (range.beg / ivl + node_ix - if one_before_range { 1 } else { 0 }) * ivl; + let tsend = range.end; + debug!( + "GenerateF64V00::new ivl {} dts {} ts {} one_before_range {}", + ivl, dts, ts, one_before_range + ); + Self { + ivl, + ts, + dts, + tsend, + node_ix, + timeout: None, + do_throttle: false, + done: false, + done_range_final: false, + } + } + + fn make_batch(&mut self) -> Sitemty { + type T = f64; + let mut item = EventsDim1::empty(); + let mut ts = self.ts; + loop { + if self.ts >= self.tsend || item.byte_estimate() > 400 { + break; + } + let pulse = ts; + let ampl = ((ts / self.ivl) as T).sin() + 2.; + let mut value = Vec::new(); + let pi = PI; + for i in 0..21 { + let x = ((-pi + (2. * pi / 20.) * i as f64).cos() + 1.1) * ampl; + value.push(x); + } + if false { + info!( + "v01 node {} made event ts {} pulse {} value {:?}", + self.node_ix, ts, pulse, value + ); + } + item.push(ts, pulse, value); + ts += self.dts; + } + self.ts = ts; + trace!("generated len {}", item.len()); + let w = ChannelEvents::Events(Box::new(item) as _); + let w = sitem_data(w); + w + } +} + +impl Stream for GenerateF64V00 { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break if self.done { + Ready(None) + } else if self.ts >= self.tsend { + self.done = true; + self.done_range_final = true; + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))) + } else if !self.do_throttle { + // To use the generator without throttling, use this scope + Ready(Some(self.make_batch())) + } else if let Some(fut) = self.timeout.as_mut() { + match fut.poll_unpin(cx) { + Ready(()) => { + self.timeout = None; + Ready(Some(self.make_batch())) + } + Pending => Pending, + } + } else { + self.timeout = Some(make_sleep_fut()); + continue; + }; + } + } +} + +pub struct GenerateWaveI16V00 { + ivl: u64, + ts: u64, + dts: u64, + tsend: u64, + node_ix: u64, + timeout: Option + Send>>>, + do_throttle: bool, + done: bool, + done_range_final: bool, +} + +impl GenerateWaveI16V00 { + pub fn self_name() -> &'static str { + std::any::type_name::() + } + + pub fn new(node_ix: u64, node_count: u64, range: SeriesRange, one_before_range: bool) -> Self { + let range = match range { + SeriesRange::TimeRange(k) => k, + SeriesRange::PulseRange(_) => todo!(), + }; + let ivl = MS * 100; + let dts = ivl * node_count as u64; + let ts = (range.beg / ivl + node_ix - if one_before_range { 1 } else { 0 }) * ivl; + let tsend = range.end; + debug!( + "{}::new ivl {} dts {} ts {} one_before_range {}", + Self::self_name(), + ivl, + dts, + ts, + one_before_range + ); + Self { + ivl, + ts, + dts, + tsend, + node_ix, + timeout: None, + do_throttle: false, + done: false, + done_range_final: false, + } + } + + fn make_batch(&mut self) -> Sitemty { + type T = i16; + let mut item = EventsDim1::empty(); + let mut ts = self.ts; + loop { + if self.ts >= self.tsend || item.byte_estimate() > 1024 * 20 { + break; + } + let pulse = ts; + let ampl = ((ts / self.ivl) as f32).sin() + 2.; + let mut value = Vec::new(); + let pi = std::f32::consts::PI; + for i in 0..21 { + let x = ((-pi + (2. * pi / 20.) * i as f32).cos() + 1.1) * ampl; + value.push(x as T); + } + if false { + info!( + "v01 node {} made event ts {} pulse {} value {:?}", + self.node_ix, ts, pulse, value + ); + } + item.push(ts, pulse, value); + ts += self.dts; + } + self.ts = ts; + trace!("generated len {}", item.len()); + let w = ChannelEvents::Events(Box::new(item) as _); + let w = sitem_data(w); + w + } +} + +impl Stream for GenerateWaveI16V00 { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break if self.done { + Ready(None) + } else if self.ts >= self.tsend { + self.done = true; + self.done_range_final = true; + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))) + } else if !self.do_throttle { + // To use the generator without throttling, use this scope + Ready(Some(self.make_batch())) + } else if let Some(fut) = self.timeout.as_mut() { + match fut.poll_unpin(cx) { + Ready(()) => { + self.timeout = None; + Ready(Some(self.make_batch())) + } + Pending => Pending, + } + } else { + self.timeout = Some(make_sleep_fut()); + continue; + }; + } + } +} diff --git a/src/instrument.rs b/src/instrument.rs new file mode 100644 index 0000000..28c7bf6 --- /dev/null +++ b/src/instrument.rs @@ -0,0 +1,33 @@ +use futures_util::Stream; +use futures_util::StreamExt; +use netpod::log::tracing; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +#[pin_project::pin_project] +pub struct InstrumentStream { + #[pin] + inp: S, + #[pin] + span: tracing::Span, +} + +impl InstrumentStream { + pub fn new(inp: S, span: tracing::Span) -> Self { + Self { inp, span } + } +} + +impl Stream for InstrumentStream +where + S: Stream, +{ + type Item = ::Item; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let mut this = self.project(); + let _spg = this.span.enter(); + this.inp.poll_next_unpin(cx) + } +} diff --git a/src/itemclone.rs b/src/itemclone.rs new file mode 100644 index 0000000..db5ceff --- /dev/null +++ b/src/itemclone.rs @@ -0,0 +1,92 @@ +use async_channel::Send; +use async_channel::Sender; +use futures_util::pin_mut; +use futures_util::Future; +use futures_util::Stream; +use futures_util::StreamExt; +use std::pin::Pin; +use std::ptr::NonNull; +use std::task::Context; +use std::task::Poll; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "ItemClone")] +pub enum Error {} + +pub struct Itemclone<'a, T, INP> +where + T: 'static, +{ + sender: Pin>>, + inp: INP, + send_fut: Option>, +} + +impl<'a, T, INP> Itemclone<'a, T, INP> { + pub fn new(inp: INP, sender: Sender) -> Self + where + INP: Stream + Unpin, + T: Clone + Unpin, + { + let sender = Box::pin(sender); + Self { + sender, + inp, + send_fut: None, + } + } +} + +impl<'a, T, INP> Itemclone<'a, T, INP> +where + INP: Stream + Unpin, + T: Clone + Unpin, +{ + fn poll_fresh(&mut self, cx: &mut Context) -> Poll>> { + use Poll::*; + match self.inp.poll_next_unpin(cx) { + Ready(Some(item)) => { + let sender = self.sender.as_mut().get_mut(); + let mut ptr1 = NonNull::from(sender); + let sender = unsafe { ptr1.as_mut() }; + self.send_fut = Some(sender.send(item.clone())); + Ready(Some(Ok(item))) + } + Ready(None) => { + self.sender.close(); + Ready(None) + } + Pending => Pending, + } + } + + fn send_copy(fut: &mut Send, cx: &mut Context) -> Poll> { + use Poll::*; + pin_mut!(fut); + match fut.poll(cx) { + Ready(Ok(())) => Ready(Ok(())), + Ready(Err(_)) => todo!("can not send copy"), + Pending => Pending, + } + } +} + +impl<'a, T, INP> Stream for Itemclone<'a, T, INP> +where + INP: Stream + Unpin, + T: Clone + Unpin, +{ + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + match self.send_fut.as_mut() { + Some(fut) => match Self::send_copy(fut, cx) { + Ready(Ok(())) => self.poll_fresh(cx), + Ready(Err(e)) => Ready(Some(Err(e))), + Pending => Pending, + }, + None => self.poll_fresh(cx), + } + } +} diff --git a/src/json_stream.rs b/src/json_stream.rs new file mode 100644 index 0000000..0f922c1 --- /dev/null +++ b/src/json_stream.rs @@ -0,0 +1,158 @@ +use crate::cbor_stream::SitemtyDynEventsStream; +use crate::streamtimeout::StreamTimeout2; +use crate::streamtimeout::TimeoutableStream; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::Events; +use items_0::WithLen; +use netpod::log::*; +use std::pin::Pin; +use std::time::Duration; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "JsonStream")] +pub enum Error { + Json(#[from] serde_json::Error), + Msg(String), +} + +pub struct ErrMsg(pub E) +where + E: ToString; + +impl From> for Error +where + E: ToString, +{ + fn from(value: ErrMsg) -> Self { + Self::Msg(value.0.to_string()) + } +} + +pub struct JsonBytes(String); + +impl JsonBytes { + pub fn new>(s: S) -> Self { + Self(s.into()) + } + + pub fn into_inner(self) -> String { + self.0 + } + + pub fn len(&self) -> u32 { + self.0.len() as _ + } +} + +impl WithLen for JsonBytes { + fn len(&self) -> usize { + self.len() as usize + } +} + +impl From for String { + fn from(value: JsonBytes) -> Self { + value.0 + } +} + +pub type JsonStream = Pin> + Send>>; + +pub fn events_stream_to_json_stream( + stream: SitemtyDynEventsStream, + timeout_provider: Box, +) -> impl Stream> { + let ivl = Duration::from_millis(4000); + let stream = TimeoutableStream::new(ivl, timeout_provider, stream); + let stream = stream.map(|x| match x { + Some(x) => map_events(x), + None => make_keepalive(), + }); + let prepend = { + let item = make_keepalive(); + futures_util::stream::iter([item]) + }; + prepend.chain(stream) +} + +fn map_events(x: Sitemty>) -> Result { + match x { + Ok(x) => match x { + StreamItem::DataItem(x) => match x { + RangeCompletableItem::Data(evs) => { + let mut k = evs; + let evs = if let Some(j) = k.as_any_mut().downcast_mut::() { + use items_0::AsAnyMut; + match j { + items_2::channelevents::ChannelEvents::Events(m) => { + if let Some(g) = m + .as_any_mut() + .downcast_mut::>() + { + trace!("consider container EnumVariant"); + let mut out = items_2::eventsdim0enum::EventsDim0Enum::new(); + for (&ts, val) in g.tss.iter().zip(g.values.iter()) { + out.push_back(ts, val.ix(), val.name_string()); + } + Box::new(items_2::channelevents::ChannelEvents::Events(Box::new(out))) + } else { + trace!("consider container channel events other events {}", k.type_name()); + k + } + } + items_2::channelevents::ChannelEvents::Status(_) => { + trace!("consider container channel events status {}", k.type_name()); + k + } + } + } else { + trace!("consider container else {}", k.type_name()); + k + }; + let s = evs.to_json_string(); + let item = JsonBytes::new(s); + Ok(item) + } + RangeCompletableItem::RangeComplete => { + let item = serde_json::json!({ + "rangeFinal": true, + }); + let s = serde_json::to_string(&item)?; + let item = JsonBytes::new(s); + Ok(item) + } + }, + StreamItem::Log(item) => { + debug!("{item:?}"); + let item = JsonBytes::new(String::new()); + Ok(item) + } + StreamItem::Stats(item) => { + debug!("{item:?}"); + let item = JsonBytes::new(String::new()); + Ok(item) + } + }, + Err(e) => { + let item = serde_json::json!({ + "error": e.to_string(), + }); + let s = serde_json::to_string(&item)?; + let item = JsonBytes::new(s); + Ok(item) + } + } +} + +fn make_keepalive() -> Result { + let item = serde_json::json!({ + "type": "keepalive", + }); + let s = serde_json::to_string(&item).unwrap(); + let item = Ok(JsonBytes::new(s)); + item +} diff --git a/src/lenframed.rs b/src/lenframed.rs new file mode 100644 index 0000000..e13ff34 --- /dev/null +++ b/src/lenframed.rs @@ -0,0 +1,27 @@ +use bytes::BufMut; +use bytes::Bytes; +use bytes::BytesMut; +use futures_util::future; +use futures_util::stream; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::WithLen; + +pub fn length_framed(inp: S) -> impl Stream> +where + S: Stream>, + T: WithLen + Into, +{ + inp.map(|x| match x { + Ok(x) => { + let n = x.len() as u32; + let mut buf1 = BytesMut::with_capacity(8); + buf1.put_u32_le(n); + [Some(Ok(buf1.freeze())), Some(Ok(x.into()))] + } + Err(e) => [Some(Err(e)), None], + }) + .map(|x| stream::iter(x)) + .flatten() + .filter_map(|x| future::ready(x)) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..2ce9cb4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,32 @@ +pub mod boxed; +pub mod cbor_stream; +pub mod collect; +#[cfg(feature = "indev")] +pub mod collect_adapter; +pub mod dtflags; +pub mod events; +pub mod eventsplainreader; +pub mod filechunkread; +pub mod firsterr; +pub mod framed_bytes; +pub mod frames; +pub mod generators; +pub mod instrument; +pub mod itemclone; +pub mod json_stream; +pub mod lenframed; +pub mod needminbuffer; +pub mod plaineventscbor; +pub mod plaineventsjson; +pub mod plaineventsstream; +pub mod print_on_done; +pub mod rangefilter2; +pub mod slidebuf; +pub mod streamtimeout; +pub mod tcprawclient; +#[cfg(test)] +pub mod test; +pub mod teststream; +pub mod timebin; +pub mod timebinnedjson; +pub mod transform; diff --git a/src/needminbuffer.rs b/src/needminbuffer.rs new file mode 100644 index 0000000..37689b6 --- /dev/null +++ b/src/needminbuffer.rs @@ -0,0 +1,111 @@ +use crate::filechunkread::FileChunkRead; +use futures_util::Stream; +use futures_util::StreamExt; +use netpod::histo::HistoLog2; +use netpod::log::*; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "NeedMinBuffer")] +pub enum Error {} + +pub struct NeedMinBuffer { + inp: Pin> + Send>>, + need_min: u32, + left: Option, + buf_len_histo: HistoLog2, + errored: bool, + completed: bool, +} + +impl NeedMinBuffer { + pub fn new( + inp: Pin> + Send>>, + ) -> Self { + Self { + inp, + need_min: 1, + left: None, + buf_len_histo: HistoLog2::new(8), + errored: false, + completed: false, + } + } + + pub fn put_back(&mut self, buf: FileChunkRead) { + assert!(self.left.is_none()); + self.left = Some(buf); + } + + pub fn set_need_min(&mut self, need_min: u32) { + self.need_min = need_min; + } +} + +// TODO collect somewhere else +impl Drop for NeedMinBuffer { + fn drop(&mut self) { + debug!("NeedMinBuffer-drop {{ buf_len_histo: {:?} }}", self.buf_len_histo); + } +} + +impl Stream for NeedMinBuffer { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break if self.completed { + panic!("NeedMinBuffer poll_next on completed"); + } else if self.errored { + self.completed = true; + return Ready(None); + } else { + match self.inp.poll_next_unpin(cx) { + Ready(Some(Ok(mut fcr))) => { + self.buf_len_histo.ingest(fcr.buf().len() as u32); + //info!("NeedMinBuffer got buf len {}", fcr.buf.len()); + match self.left.take() { + Some(mut lfcr) => { + // TODO measure: + lfcr.buf_mut().unsplit(fcr.buf_take()); + *lfcr.duration_mut() += *fcr.duration(); + let fcr = lfcr; + if fcr.buf().len() as u32 >= self.need_min { + //info!("with left ready len {} need_min {}", buf.len(), self.need_min); + Ready(Some(Ok(fcr))) + } else { + //info!("with left not enough len {} need_min {}", buf.len(), self.need_min); + self.left.replace(fcr); + continue; + } + } + None => { + if fcr.buf().len() as u32 >= self.need_min { + //info!("simply ready len {} need_min {}", buf.len(), self.need_min); + Ready(Some(Ok(fcr))) + } else { + //info!("no previous leftover, need more len {} need_min {}", buf.len(), self.need_min); + self.left.replace(fcr); + continue; + } + } + } + } + Ready(Some(Err(e))) => { + self.errored = true; + Ready(Some(Err(e.into()))) + } + Ready(None) => { + // TODO collect somewhere + debug!("NeedMinBuffer histo: {:?}", self.buf_len_histo); + Ready(None) + } + Pending => Pending, + } + }; + } + } +} diff --git a/src/plaineventscbor.rs b/src/plaineventscbor.rs new file mode 100644 index 0000000..599d528 --- /dev/null +++ b/src/plaineventscbor.rs @@ -0,0 +1,30 @@ +use crate::cbor_stream::events_stream_to_cbor_stream; +use crate::cbor_stream::CborStream; +use crate::firsterr::non_empty; +use crate::firsterr::only_first_err; +use crate::plaineventsstream::dyn_events_stream; +use crate::streamtimeout::StreamTimeout2; +use crate::tcprawclient::OpenBoxedBytesStreamsBox; +use netpod::ChannelTypeConfigGen; +use netpod::ReqCtx; +use query::api4::events::PlainEventsQuery; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "PlainEventsCbor")] +pub enum Error { + Stream(#[from] crate::plaineventsstream::Error), +} + +pub async fn plain_events_cbor_stream( + evq: &PlainEventsQuery, + ch_conf: ChannelTypeConfigGen, + ctx: &ReqCtx, + open_bytes: OpenBoxedBytesStreamsBox, + timeout_provider: Box, +) -> Result { + let stream = dyn_events_stream(evq, ch_conf, ctx, open_bytes).await?; + let stream = events_stream_to_cbor_stream(stream, timeout_provider); + let stream = non_empty(stream); + let stream = only_first_err(stream); + Ok(Box::pin(stream)) +} diff --git a/src/plaineventsjson.rs b/src/plaineventsjson.rs new file mode 100644 index 0000000..53a587d --- /dev/null +++ b/src/plaineventsjson.rs @@ -0,0 +1,120 @@ +use crate::collect::Collect; +use crate::collect::CollectResult; +use crate::firsterr::non_empty; +use crate::firsterr::only_first_err; +use crate::json_stream::events_stream_to_json_stream; +use crate::json_stream::JsonStream; +use crate::plaineventsstream::dyn_events_stream; +use crate::streamtimeout::StreamTimeout2; +use crate::tcprawclient::OpenBoxedBytesStreamsBox; +use futures_util::StreamExt; +use items_0::collect_s::CollectableDyn; +use items_0::on_sitemty_data; +use netpod::log::*; +use netpod::ChannelTypeConfigGen; +use netpod::Cluster; +use netpod::HasTimeout; +use netpod::ReqCtx; +use query::api4::events::PlainEventsQuery; +use serde_json::Value as JsonValue; +use std::time::Duration; +use std::time::Instant; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "PlainEventsJson")] +pub enum Error { + Stream(#[from] crate::plaineventsstream::Error), + Json(#[from] serde_json::Error), + Collect(#[from] crate::collect::Error), +} + +pub async fn plain_events_json( + evq: &PlainEventsQuery, + ch_conf: ChannelTypeConfigGen, + ctx: &ReqCtx, + _cluster: &Cluster, + open_bytes: OpenBoxedBytesStreamsBox, + timeout_provider: Box, +) -> Result, Error> { + debug!("plain_events_json evquery {:?}", evq); + let deadline = Instant::now() + evq.timeout().unwrap_or(Duration::from_millis(4000)); + + let stream = dyn_events_stream(evq, ch_conf, ctx, open_bytes).await?; + + let stream = stream.map(move |k| { + on_sitemty_data!(k, |mut k: Box| { + if let Some(j) = k.as_any_mut().downcast_mut::() { + use items_0::AsAnyMut; + match j { + items_2::channelevents::ChannelEvents::Events(m) => { + if let Some(g) = m + .as_any_mut() + .downcast_mut::>() + { + trace!("consider container EnumVariant"); + let mut out = items_2::eventsdim0enum::EventsDim0Enum::new(); + for (&ts, val) in g.tss.iter().zip(g.values.iter()) { + out.push_back(ts, val.ix(), val.name_string()); + } + let k: Box = Box::new(out); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))) + } else { + trace!("consider container channel events other events {}", k.type_name()); + let k: Box = Box::new(k); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))) + } + } + items_2::channelevents::ChannelEvents::Status(_) => { + trace!("consider container channel events status {}", k.type_name()); + let k: Box = Box::new(k); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))) + } + } + } else { + trace!("consider container else {}", k.type_name()); + let k: Box = Box::new(k); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))) + } + }) + }); + + //let stream = PlainEventStream::new(stream); + //let stream = EventsToTimeBinnable::new(stream); + //let stream = TimeBinnableToCollectable::new(stream); + let stream = Box::pin(stream); + debug!("plain_events_json boxed stream created"); + let collected = Collect::new( + stream, + deadline, + evq.events_max(), + evq.bytes_max(), + Some(evq.range().clone()), + None, + timeout_provider, + ) + .await?; + debug!("plain_events_json collected"); + if let CollectResult::Some(x) = collected { + let jsval = x.to_json_value()?; + debug!("plain_events_json json serialized"); + Ok(CollectResult::Some(jsval)) + } else { + debug!("plain_events_json timeout"); + Ok(CollectResult::Timeout) + } +} + +pub async fn plain_events_json_stream( + evq: &PlainEventsQuery, + ch_conf: ChannelTypeConfigGen, + ctx: &ReqCtx, + open_bytes: OpenBoxedBytesStreamsBox, + timeout_provider: Box, +) -> Result { + trace!("plain_events_json_stream"); + let stream = dyn_events_stream(evq, ch_conf, ctx, open_bytes).await?; + let stream = events_stream_to_json_stream(stream, timeout_provider); + let stream = non_empty(stream); + let stream = only_first_err(stream); + Ok(Box::pin(stream)) +} diff --git a/src/plaineventsstream.rs b/src/plaineventsstream.rs new file mode 100644 index 0000000..c4e3a04 --- /dev/null +++ b/src/plaineventsstream.rs @@ -0,0 +1,226 @@ +use crate::tcprawclient::container_stream_from_bytes_stream; +use crate::tcprawclient::make_sub_query; +use crate::tcprawclient::OpenBoxedBytesStreamsBox; +use crate::transform::build_merged_event_transform; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::on_sitemty_data; +use items_0::streamitem::Sitemty; +use items_0::Events; +use items_2::channelevents::ChannelEvents; +use items_2::merger::Merger; +use netpod::log::*; +use netpod::ChannelTypeConfigGen; +use netpod::ReqCtx; +use query::api4::events::PlainEventsQuery; +use std::pin::Pin; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "PlainEventsStream")] +pub enum Error { + Netpod(#[from] netpod::NetpodError), + Transform(#[from] crate::transform::Error), + TcpRawClient(#[from] crate::tcprawclient::Error), +} + +pub type DynEventsStream = Pin>> + Send>>; + +pub async fn dyn_events_stream( + evq: &PlainEventsQuery, + ch_conf: ChannelTypeConfigGen, + ctx: &ReqCtx, + open_bytes: OpenBoxedBytesStreamsBox, +) -> Result { + trace!("dyn_events_stream {}", evq.summary_short()); + use query::api4::events::EventsSubQuerySettings; + let subq = make_sub_query( + ch_conf, + evq.range().clone(), + evq.one_before_range(), + evq.transform().clone(), + EventsSubQuerySettings::from(evq), + evq.log_level().into(), + ctx, + ); + let inmem_bufcap = subq.inmem_bufcap(); + let mut tr = build_merged_event_transform(evq.transform())?; + let bytes_streams = open_bytes.open(subq, ctx.clone()).await?; + let mut inps = Vec::new(); + for s in bytes_streams { + let s = container_stream_from_bytes_stream::(s, inmem_bufcap.clone(), "TODOdbgdesc".into())?; + let s = Box::pin(s) as Pin> + Send>>; + inps.push(s); + } + // TODO make sure the empty container arrives over the network. + // TODO propagate also the max-buf-len for the first stage event reader. + // TODO use a mixture of count and byte-size as threshold. + let stream = Merger::new(inps, evq.merger_out_len_max()); + + // let stream = stream.map(|item| { + // info!("item after merge: {item:?}"); + // item + // }); + + let stream = crate::rangefilter2::RangeFilter2::new(stream, evq.range().try_into()?, evq.one_before_range()); + + // let stream = stream.map(|item| { + // info!("item after rangefilter: {item:?}"); + // item + // }); + + let stream = stream.map(move |k| { + on_sitemty_data!(k, |k| { + let k: Box = Box::new(k); + // trace!("got len {}", k.len()); + let k = tr.0.transform(k); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))) + }) + }); + + if let Some(wasmname) = evq.test_do_wasm() { + let stream = transform_wasm::<_, items_0::streamitem::SitemErrTy>(stream, wasmname, ctx).await?; + Ok(Box::pin(stream)) + } else { + Ok(Box::pin(stream)) + } +} + +#[cfg(not(feature = "wasm_transform"))] +async fn transform_wasm( + stream: INP, + _wasmname: &str, + _ctx: &ReqCtx, +) -> Result>> + Send, Error> +where + INP: Stream>> + Send + 'static, +{ + let ret: Pin>> + Send>> = Box::pin(stream); + Ok(ret) +} + +#[cfg(feature = "wasm_transform")] +async fn transform_wasm( + stream: INP, + wasmname: &str, + ctx: &ReqCtx, +) -> Result>> + Send, Error> +where + INP: Stream>> + Send + 'static, +{ + debug!("make wasm transform"); + use httpclient::url::Url; + use wasmer::Value; + use wasmer::WasmSlice; + let t = httpclient::http_get( + Url::parse(&format!("http://data-api.psi.ch/distri/{}", wasmname)).unwrap(), + "*/*", + ctx, + ) + .await + .unwrap(); + let wasm = t.body; + // let wasm = include_bytes!("dummy.wasm"); + let mut store = wasmer::Store::default(); + let module = wasmer::Module::new(&store, wasm).unwrap(); + // TODO assert that memory is large enough + let memory = wasmer::Memory::new(&mut store, wasmer::MemoryType::new(10, Some(30), false)).unwrap(); + let import_object = wasmer::imports! { + "env" => { + "memory" => memory.clone(), + } + }; + let instance = wasmer::Instance::new(&mut store, &module, &import_object).unwrap(); + let get_buffer_ptr = instance.exports.get_function("get_buffer_ptr").unwrap(); + let buffer_ptr = get_buffer_ptr.call(&mut store, &[]).unwrap(); + let buffer_ptr = buffer_ptr[0].i32().unwrap(); + let stream = stream.map(move |x| { + let memory = memory.clone(); + let item = on_sitemty_data!(x, |mut evs: Box| { + let x = { + use items_0::AsAnyMut; + if true { + let r1 = evs + .as_any_mut() + .downcast_mut::>() + .is_some(); + let r2 = evs + .as_mut() + .as_any_mut() + .downcast_mut::>() + .is_some(); + let r3 = evs + .as_any_mut() + .downcast_mut::>>() + .is_some(); + let r4 = evs + .as_mut() + .as_any_mut() + .downcast_mut::>>() + .is_some(); + let r5 = evs.as_mut().as_any_mut().downcast_mut::().is_some(); + let r6 = evs.as_mut().as_any_mut().downcast_mut::>().is_some(); + debug!("wasm castings: {r1} {r2} {r3} {r4} {r5} {r6}"); + } + if let Some(evs) = evs.as_any_mut().downcast_mut::() { + match evs { + ChannelEvents::Events(evs) => { + if let Some(evs) = evs.as_any_mut().downcast_mut::>() { + use items_0::WithLen; + if evs.len() == 0 { + debug!("wasm empty EventsDim0"); + } else { + debug!("wasm see EventsDim0"); + let max_len_needed = 16000; + let dummy1 = instance.exports.get_function("dummy1").unwrap(); + let s = evs.values.as_mut_slices(); + for sl in [s.0, s.1] { + if sl.len() > max_len_needed as _ { + // TODO cause error + panic!(); + } + let wmemoff = buffer_ptr as u64; + let view = memory.view(&store); + // TODO is the offset bytes or elements? + let wsl = WasmSlice::::new(&view, wmemoff, sl.len() as _).unwrap(); + // debug!("wasm pages {:?} data size {:?}", view.size(), view.data_size()); + wsl.write_slice(&sl).unwrap(); + let ptr = wsl.as_ptr32(); + debug!("ptr {:?} offset {}", ptr, ptr.offset()); + let params = [Value::I32(ptr.offset() as _), Value::I32(sl.len() as _)]; + let res = dummy1.call(&mut store, ¶ms).unwrap(); + match res[0] { + Value::I32(x) => { + debug!("wasm dummy1 returned: {x:?}"); + if x != 1 { + error!("unexpected return value {res:?}"); + } + } + _ => { + error!("unexpected return type {res:?}"); + } + } + // Init the slice again because we need to drop ownership for the function call. + let view = memory.view(&store); + let wsl = WasmSlice::::new(&view, wmemoff, sl.len() as _).unwrap(); + wsl.read_slice(sl).unwrap(); + } + } + } else { + debug!("wasm not EventsDim0"); + } + } + ChannelEvents::Status(_) => {} + } + } else { + debug!("wasm not ChannelEvents"); + } + evs + }; + Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))) + }); + // Box::new(item) as Box + item + }); + let ret: Pin>> + Send>> = Box::pin(stream); + Ok(ret) +} diff --git a/src/print_on_done.rs b/src/print_on_done.rs new file mode 100644 index 0000000..54280d8 --- /dev/null +++ b/src/print_on_done.rs @@ -0,0 +1,41 @@ +use futures_util::Stream; +use futures_util::StreamExt; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use std::time::Instant; + +pub struct PrintOnDone { + ts_ctor: Instant, + inp: INP, + on_done: Pin () + Send>>, +} + +impl PrintOnDone { + pub fn new(inp: INP, on_done: Pin () + Send>>) -> Self { + Self { + ts_ctor: Instant::now(), + inp, + on_done, + } + } +} + +impl Stream for PrintOnDone +where + INP: Stream + Unpin, +{ + type Item = ::Item; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + match self.inp.poll_next_unpin(cx) { + Ready(Some(x)) => Ready(Some(x)), + Ready(None) => { + (self.on_done)(self.ts_ctor); + Ready(None) + } + Pending => Pending, + } + } +} diff --git a/src/rangefilter2.rs b/src/rangefilter2.rs new file mode 100644 index 0000000..e3f2ec9 --- /dev/null +++ b/src/rangefilter2.rs @@ -0,0 +1,276 @@ +#[cfg(feature = "tests-runtime")] +mod test; + +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::sitem_err_from_string; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StatsItem; +use items_0::streamitem::StreamItem; +use items_0::MergeError; +use items_2::merger::Mergeable; +use netpod::log::*; +use netpod::range::evrange::NanoRange; +use netpod::RangeFilterStats; +use netpod::TsMsVecFmt; +use netpod::TsNano; +use std::fmt; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +macro_rules! trace_emit { ($det:expr, $($arg:tt)*) => ( if false && $det { trace!($($arg)*); } ) } + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "Rangefilter")] +pub enum Error { + Merge(#[from] MergeError), +} + +pub struct RangeFilter2 +where + S: Stream> + Unpin, + ITY: Mergeable, +{ + inp: S, + range: NanoRange, + range_str: String, + one_before: bool, + stats: RangeFilterStats, + slot1: Option, + have_range_complete: bool, + inp_done: bool, + raco_done: bool, + done: bool, + complete: bool, + trdet: bool, +} + +impl RangeFilter2 +where + S: Stream> + Unpin, + ITY: Mergeable, +{ + pub fn type_name() -> &'static str { + std::any::type_name::() + } + + pub fn new(inp: S, range: NanoRange, one_before: bool) -> Self { + let trdet = false; + trace_emit!( + trdet, + "{}::new range: {:?} one_before {:?}", + Self::type_name(), + range, + one_before + ); + Self { + inp, + range_str: format!("{:?}", range), + range, + one_before, + stats: RangeFilterStats::new(), + slot1: None, + have_range_complete: false, + inp_done: false, + raco_done: false, + done: false, + complete: false, + trdet, + } + } + + fn prune_high(&mut self, mut item: ITY, ts: u64) -> Result { + let ret = match item.find_highest_index_lt(ts) { + Some(ihlt) => { + let n = item.len(); + if ihlt + 1 == n { + // TODO gather stats, this should be the most common case. + self.stats.items_no_prune_high += 1; + item + } else { + self.stats.items_part_prune_high += 1; + let mut dummy = item.new_empty(); + match item.drain_into(&mut dummy, (ihlt + 1, n)) { + Ok(_) => {} + Err(e) => match e { + MergeError::NotCompatible => { + error!("logic error") + } + MergeError::Full => error!("full, logic error"), + }, + } + item + } + } + None => { + self.stats.items_all_prune_high += 1; + item.new_empty() + } + }; + Ok(ret) + } + + fn handle_item(&mut self, item: ITY) -> Result { + if let Some(ts_min) = item.ts_min() { + if ts_min < self.range.beg() { + debug!("ITEM BEFORE RANGE (how many?)"); + } + } + let min = item.ts_min().map(|x| TsNano::from_ns(x).fmt()); + let max = item.ts_max().map(|x| TsNano::from_ns(x).fmt()); + trace_emit!( + self.trdet, + "see event len {} min {:?} max {:?}", + item.len(), + min, + max + ); + let mut item = self.prune_high(item, self.range.end)?; + let ret = if self.one_before { + let lige = item.find_lowest_index_ge(self.range.beg); + trace_emit!(self.trdet, "YES one_before_range ilge {:?}", lige); + match lige { + Some(lige) => { + if lige == 0 { + if let Some(sl1) = self.slot1.take() { + self.slot1 = Some(item); + sl1 + } else { + item + } + } else { + trace_emit!(self.trdet, "discarding events len {:?}", lige - 1); + let mut dummy = item.new_empty(); + item.drain_into(&mut dummy, (0, lige - 1))?; + self.slot1 = None; + item + } + } + None => { + // TODO keep stats about this case + trace_emit!(self.trdet, "drain into to keep one before"); + let n = item.len(); + let mut keep = item.new_empty(); + item.drain_into(&mut keep, (n.max(1) - 1, n))?; + self.slot1 = Some(keep); + item.new_empty() + } + } + } else { + let lige = item.find_lowest_index_ge(self.range.beg); + trace_emit!(self.trdet, "NOT one_before_range ilge {:?}", lige); + match lige { + Some(lige) => { + let mut dummy = item.new_empty(); + item.drain_into(&mut dummy, (0, lige))?; + item + } + None => { + // TODO count case for stats + item.new_empty() + } + } + }; + Ok(ret) + } +} + +impl RangeFilter2 +where + S: Stream> + Unpin, + ITY: Mergeable, +{ + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll::Item>> { + use Poll::*; + loop { + break if self.complete { + error!("{} poll_next on complete", Self::type_name()); + Ready(Some(sitem_err_from_string("poll next on complete"))) + } else if self.done { + self.complete = true; + Ready(None) + } else if self.raco_done { + self.done = true; + let k = std::mem::replace(&mut self.stats, RangeFilterStats::new()); + let k = StatsItem::RangeFilterStats(k); + Ready(Some(Ok(StreamItem::Stats(k)))) + } else if self.inp_done { + self.raco_done = true; + if self.have_range_complete { + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))) + } else { + continue; + } + } else { + match self.inp.poll_next_unpin(cx) { + Ready(Some(item)) => match item { + Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))) => match self.handle_item(item) { + Ok(item) => { + trace_emit!(self.trdet, "emit {}", TsMsVecFmt(Mergeable::tss(&item).iter())); + let item = Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))); + Ready(Some(item)) + } + Err(e) => { + error!("sees: {e}"); + self.inp_done = true; + Ready(Some(sitem_err_from_string(e))) + } + }, + Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)) => { + self.have_range_complete = true; + continue; + } + k => Ready(Some(k)), + }, + Ready(None) => { + self.inp_done = true; + if let Some(sl1) = self.slot1.take() { + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(sl1))))) + } else { + continue; + } + } + Pending => Pending, + } + }; + } + } +} + +impl Stream for RangeFilter2 +where + S: Stream> + Unpin, + ITY: Mergeable, +{ + type Item = Sitemty; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let span1 = span!(Level::INFO, "RangeFilter2", range = tracing::field::Empty); + span1.record("range", &self.range_str.as_str()); + let _spg = span1.enter(); + RangeFilter2::poll_next(self, cx) + } +} + +impl fmt::Debug for RangeFilter2 +where + S: Stream> + Unpin, + ITY: Mergeable, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("RangeFilter2").field("stats", &self.stats).finish() + } +} + +impl Drop for RangeFilter2 +where + S: Stream> + Unpin, + ITY: Mergeable, +{ + fn drop(&mut self) { + // Self::type_name() + debug!("drop {:?}", self); + } +} diff --git a/src/rangefilter2/test.rs b/src/rangefilter2/test.rs new file mode 100644 index 0000000..5abe556 --- /dev/null +++ b/src/rangefilter2/test.rs @@ -0,0 +1,267 @@ +use crate::rangefilter2::RangeFilter2; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::Events; +use netpod::range::evrange::NanoRange; +use netpod::TsNano; +use std::collections::VecDeque; + +#[test] +fn test_00() { + use items_0::Empty; + use items_2::eventsdim0::EventsDim0; + let ms = 1_000_000; + let beg = TsNano::from_ms(1000 * 10); + let end = TsNano::from_ms(1000 * 20); + let mut item1 = EventsDim0::::empty(); + item1.push_back(beg.ns() + 0 * ms, 0, 3.); + item1.push_back(beg.ns() + 1 * ms, 0, 3.1); + item1.push_back(beg.ns() + 2 * ms, 0, 3.2); + item1.push_back(beg.ns() + 3 * ms, 0, 3.3); + item1.push_back(beg.ns() + 4 * ms, 0, 3.4); + item1.push_back(end.ns() - 1, 0, 4.0); + item1.push_back(end.ns() + 0, 0, 4.1); + item1.push_back(end.ns() + 1, 0, 4.1); + let w1: Box = Box::new(item1.clone()); + let e1 = Ok(StreamItem::DataItem(RangeCompletableItem::Data(w1))); + let inp = futures_util::stream::iter([e1]); + let one_before_range = false; + let range = NanoRange::from((beg.ns(), end.ns())); + let stream = RangeFilter2::new(inp, range, one_before_range); + let fut = async move { + let tss_items = fetch_into_tss_items(stream).await; + let exp: &[&[u64]] = &[&[ + beg.ns() + 0 * ms, + beg.ns() + 1 * ms, + beg.ns() + 2 * ms, + beg.ns() + 3 * ms, + beg.ns() + 4 * ms, + end.ns() - 1, + ]]; + assert_eq!(&tss_items, &exp); + Ok::<_, Error>(()) + }; + taskrun::run(fut).unwrap(); +} + +#[test] +fn test_cut_before_00() { + use items_0::Empty; + use items_2::eventsdim0::EventsDim0; + let ms = 1_000_000; + let beg = TsNano::from_ms(1000 * 10); + let end = TsNano::from_ms(1000 * 20); + let mut items = Vec::new(); + { + let mut item = EventsDim0::::empty(); + item.push_back(beg.ns() - 1, 0, 2.9); + let w: Box = Box::new(item.clone()); + let e: Sitemty<_> = Ok(StreamItem::DataItem(RangeCompletableItem::Data(w))); + items.push(e); + } + { + let mut item = EventsDim0::::empty(); + item.push_back(beg.ns() + 0 * ms, 0, 3.); + item.push_back(beg.ns() + 1 * ms, 0, 3.1); + item.push_back(beg.ns() + 2 * ms, 0, 3.2); + item.push_back(beg.ns() + 3 * ms, 0, 3.3); + item.push_back(beg.ns() + 4 * ms, 0, 3.4); + item.push_back(end.ns() - 1, 0, 4.0); + item.push_back(end.ns() + 0, 0, 4.1); + item.push_back(end.ns() + 1, 0, 4.1); + let w: Box = Box::new(item.clone()); + let e: Sitemty<_> = Ok(StreamItem::DataItem(RangeCompletableItem::Data(w))); + items.push(e); + } + let inp = futures_util::stream::iter(items); + let one_before_range = false; + let range = NanoRange::from((beg.ns(), end.ns())); + let stream = RangeFilter2::new(inp, range, one_before_range); + let fut = async move { + let tss_items = fetch_into_tss_items(stream).await; + let exp: &[&[u64]] = &[ + // TODO in the future this empty may be discarded + &[], + &[ + beg.ns() + 0 * ms, + beg.ns() + 1 * ms, + beg.ns() + 2 * ms, + beg.ns() + 3 * ms, + beg.ns() + 4 * ms, + end.ns() - 1, + ], + ]; + assert_eq!(&tss_items, &exp); + Ok::<_, Error>(()) + }; + taskrun::run(fut).unwrap(); +} + +#[test] +fn test_one_before_00() { + use items_0::Empty; + use items_2::eventsdim0::EventsDim0; + let ms = 1_000_000; + let beg = TsNano::from_ms(1000 * 10); + let end = TsNano::from_ms(1000 * 20); + let mut items = Vec::new(); + { + let mut item = EventsDim0::::empty(); + item.push_back(beg.ns() - 1, 0, 2.9); + let w: Box = Box::new(item.clone()); + let e: Sitemty<_> = Ok(StreamItem::DataItem(RangeCompletableItem::Data(w))); + items.push(e); + } + { + let mut item = EventsDim0::::empty(); + item.push_back(beg.ns() + 0 * ms, 0, 3.); + item.push_back(beg.ns() + 1 * ms, 0, 3.1); + item.push_back(beg.ns() + 2 * ms, 0, 3.2); + item.push_back(beg.ns() + 3 * ms, 0, 3.3); + item.push_back(beg.ns() + 4 * ms, 0, 3.4); + item.push_back(end.ns() - 1, 0, 4.0); + item.push_back(end.ns() + 0, 0, 4.1); + item.push_back(end.ns() + 1, 0, 4.1); + let w: Box = Box::new(item.clone()); + let e: Sitemty<_> = Ok(StreamItem::DataItem(RangeCompletableItem::Data(w))); + items.push(e); + } + let inp = futures_util::stream::iter(items); + let one_before_range = true; + let range = NanoRange::from((beg.ns(), end.ns())); + let stream = RangeFilter2::new(inp, range, one_before_range); + let fut = async move { + let tss_items = fetch_into_tss_items(stream).await; + let exp: &[&[u64]] = &[ + // TODO in the future this empty may be discarded + &[], + &[ + // + beg.ns() - 1, + ], + &[ + beg.ns() + 0 * ms, + beg.ns() + 1 * ms, + beg.ns() + 2 * ms, + beg.ns() + 3 * ms, + beg.ns() + 4 * ms, + end.ns() - 1, + ], + ]; + assert_eq!(&tss_items, &exp); + Ok::<_, Error>(()) + }; + taskrun::run(fut).unwrap(); +} + +#[test] +fn test_one_before_01() { + use items_0::Empty; + use items_2::eventsdim0::EventsDim0; + let ms = 1_000_000; + let beg = TsNano::from_ms(1000 * 10); + let end = TsNano::from_ms(1000 * 20); + let mut items = Vec::new(); + { + let mut item = EventsDim0::::empty(); + item.push_back(beg.ns() - 1, 0, 2.9); + item.push_back(beg.ns() + 0 * ms, 0, 3.); + let w: Box = Box::new(item.clone()); + let e: Sitemty<_> = Ok(StreamItem::DataItem(RangeCompletableItem::Data(w))); + items.push(e); + } + { + let mut item = EventsDim0::::empty(); + item.push_back(beg.ns() + 1 * ms, 0, 3.1); + item.push_back(beg.ns() + 2 * ms, 0, 3.2); + item.push_back(beg.ns() + 3 * ms, 0, 3.3); + item.push_back(beg.ns() + 4 * ms, 0, 3.4); + item.push_back(end.ns() - 1, 0, 4.0); + item.push_back(end.ns() + 0, 0, 4.1); + item.push_back(end.ns() + 1, 0, 4.1); + let w: Box = Box::new(item.clone()); + let e: Sitemty<_> = Ok(StreamItem::DataItem(RangeCompletableItem::Data(w))); + items.push(e); + } + let inp = futures_util::stream::iter(items); + let one_before_range = true; + let range = NanoRange::from((beg.ns(), end.ns())); + let stream = RangeFilter2::new(inp, range, one_before_range); + let fut = async move { + let tss_items = fetch_into_tss_items(stream).await; + let exp: &[&[u64]] = &[ + // TODO in the future this empty may be discarded + // &[], + &[ + // + beg.ns() - 1, + beg.ns() + 0 * ms, + ], + &[ + beg.ns() + 1 * ms, + beg.ns() + 2 * ms, + beg.ns() + 3 * ms, + beg.ns() + 4 * ms, + end.ns() - 1, + ], + ]; + assert_eq!(&tss_items, &exp); + Ok::<_, Error>(()) + }; + taskrun::run(fut).unwrap(); +} + +#[test] +fn test_one_before_only() { + use items_0::Empty; + use items_2::eventsdim0::EventsDim0; + let _ms = 1_000_000; + let beg = TsNano::from_ms(1000 * 10); + let end = TsNano::from_ms(1000 * 20); + let mut items = Vec::new(); + { + let mut item = EventsDim0::::empty(); + item.push_back(beg.ns() - 1, 0, 2.9); + let w: Box = Box::new(item.clone()); + let e: Sitemty<_> = Ok(StreamItem::DataItem(RangeCompletableItem::Data(w))); + items.push(e); + } + let inp = futures_util::stream::iter(items); + let one_before_range = true; + let range = NanoRange::from((beg.ns(), end.ns())); + let stream = RangeFilter2::new(inp, range, one_before_range); + let fut = async move { + let tss_items = fetch_into_tss_items(stream).await; + let exp: &[&[u64]] = &[ + // TODO in the future this empty may be discarded + &[], + &[ + // + beg.ns() - 1, + ], + ]; + assert_eq!(&tss_items, &exp); + Ok::<_, Error>(()) + }; + taskrun::run(fut).unwrap(); +} + +#[cfg(test)] +async fn fetch_into_tss_items(mut inp: INP) -> VecDeque> +where + INP: Stream>> + Unpin, +{ + let mut tss_items = VecDeque::new(); + while let Some(e) = inp.next().await { + if let Ok(StreamItem::DataItem(RangeCompletableItem::Data(evs))) = e { + eprintln!("{:?}", evs); + tss_items.push_back(Events::tss(&evs).clone()); + } else { + eprintln!("other item ----------: {:?}", e); + } + } + tss_items +} diff --git a/src/slidebuf.rs b/src/slidebuf.rs new file mode 100644 index 0000000..e595394 --- /dev/null +++ b/src/slidebuf.rs @@ -0,0 +1,441 @@ +use std::fmt; + +#[derive(Debug)] +pub enum Error { + NotEnoughBytes, + NotEnoughSpace(usize, usize, usize), + TryFromSliceError, +} + +impl fmt::Display for Error { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{self:?}") + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + None + } +} + +impl From for Error { + fn from(_: std::array::TryFromSliceError) -> Self { + Self::TryFromSliceError + } +} + +pub struct SlideBuf { + buf: Vec, + wp: usize, + rp: usize, +} + +macro_rules! check_invariants { + ($self:expr) => { + //$self.check_invariants() + }; +} + +impl SlideBuf { + pub fn new(cap: usize) -> Self { + Self { + buf: vec![0; cap], + wp: 0, + rp: 0, + } + } + + pub fn state(&self) -> (usize, usize) { + (self.rp, self.wp) + } + + pub fn len(&self) -> usize { + check_invariants!(self); + self.wp - self.rp + } + + #[inline(always)] + pub fn cap(&self) -> usize { + check_invariants!(self); + self.buf.len() + } + + pub fn wcap(&self) -> usize { + check_invariants!(self); + self.buf.len() - self.wp + } + + pub fn data(&self) -> &[u8] { + check_invariants!(self); + &self.buf[self.rp..self.wp] + } + + pub fn data_mut(&mut self) -> &mut [u8] { + check_invariants!(self); + &mut self.buf[self.rp..self.wp] + } + + pub fn reset(&mut self) { + self.rp = 0; + self.wp = 0; + } + + pub fn adv(&mut self, x: usize) -> Result<(), Error> { + check_invariants!(self); + if self.len() < x { + return Err(Error::NotEnoughBytes); + } else { + self.rp += x; + Ok(()) + } + } + + pub fn wadv(&mut self, x: usize) -> Result<(), Error> { + check_invariants!(self); + if self.wcap() < x { + self.rewind(); + } + if self.wcap() < x { + return Err(Error::NotEnoughSpace(self.cap(), self.wcap(), x)); + } else { + self.wp += x; + Ok(()) + } + } + + pub fn rp(&self) -> usize { + self.rp + } + + pub fn set_rp(&mut self, rp: usize) -> Result<(), Error> { + check_invariants!(self); + if rp > self.wp { + Err(Error::NotEnoughBytes) + } else { + self.rp = rp; + Ok(()) + } + } + + pub fn rewind_rp(&mut self, n: usize) -> Result<(), Error> { + check_invariants!(self); + if self.rp < n { + Err(Error::NotEnoughBytes) + } else { + self.rp -= n; + Ok(()) + } + } + + pub fn read_u8(&mut self) -> Result { + check_invariants!(self); + type T = u8; + const TS: usize = std::mem::size_of::(); + if self.len() < TS { + return Err(Error::NotEnoughBytes); + } else { + let val = self.buf[self.rp]; + self.rp += TS; + Ok(val) + } + } + + pub fn read_u16_be(&mut self) -> Result { + check_invariants!(self); + type T = u16; + const TS: usize = std::mem::size_of::(); + if self.len() < TS { + return Err(Error::NotEnoughBytes); + } else { + let val = T::from_be_bytes(self.buf[self.rp..self.rp + TS].try_into()?); + self.rp += TS; + Ok(val) + } + } + + pub fn read_u32_be(&mut self) -> Result { + check_invariants!(self); + type T = u32; + const TS: usize = std::mem::size_of::(); + if self.len() < TS { + return Err(Error::NotEnoughBytes); + } else { + let val = T::from_be_bytes(self.buf[self.rp..self.rp + TS].try_into()?); + self.rp += TS; + Ok(val) + } + } + + pub fn read_u64_be(&mut self) -> Result { + check_invariants!(self); + type T = u64; + const TS: usize = std::mem::size_of::(); + if self.len() < TS { + return Err(Error::NotEnoughBytes); + } else { + let val = T::from_be_bytes(self.buf[self.rp..self.rp + TS].try_into()?); + self.rp += TS; + Ok(val) + } + } + + pub fn read_i32_be(&mut self) -> Result { + check_invariants!(self); + type T = i32; + const TS: usize = std::mem::size_of::(); + if self.len() < TS { + return Err(Error::NotEnoughBytes); + } else { + let val = T::from_be_bytes(self.buf[self.rp..self.rp + TS].try_into()?); + self.rp += TS; + Ok(val) + } + } + + pub fn read_i64_be(&mut self) -> Result { + check_invariants!(self); + type T = i64; + const TS: usize = std::mem::size_of::(); + if self.len() < TS { + return Err(Error::NotEnoughBytes); + } else { + let val = T::from_be_bytes(self.buf[self.rp..self.rp + TS].try_into()?); + self.rp += TS; + Ok(val) + } + } + + pub fn read_f32_be(&mut self) -> Result { + check_invariants!(self); + type T = f32; + const TS: usize = std::mem::size_of::(); + if self.len() < TS { + return Err(Error::NotEnoughBytes); + } else { + let val = T::from_be_bytes(self.buf[self.rp..self.rp + TS].try_into()?); + self.rp += TS; + Ok(val) + } + } + + pub fn read_f64_be(&mut self) -> Result { + check_invariants!(self); + type T = f64; + const TS: usize = std::mem::size_of::(); + if self.len() < TS { + return Err(Error::NotEnoughBytes); + } else { + let val = T::from_be_bytes(self.buf[self.rp..self.rp + TS].try_into()?); + self.rp += TS; + Ok(val) + } + } + + pub fn read_bytes(&mut self, n: usize) -> Result<&[u8], Error> { + check_invariants!(self); + if self.len() < n { + return Err(Error::NotEnoughBytes); + } else { + let val = self.buf[self.rp..self.rp + n].as_ref(); + self.rp += n; + Ok(val) + } + } + + /*pub fn read_buf_for_fill(&mut self, need_min: usize) -> ReadBuf { + check_invariants!(self); + self.rewind_if_needed(need_min); + let read_buf = ReadBuf::new(&mut self.buf[self.wp..]); + read_buf + }*/ + + // TODO issue is that this return exactly the size that was asked for, + // but most of time, we want to first get some scratch space, and later + // advance the write pointer. + pub fn ___write_buf___(&mut self, n: usize) -> Result<&mut [u8], Error> { + check_invariants!(self); + self.rewind_if_needed(n); + if self.wcap() < n { + self.rewind(); + } + if self.wcap() < n { + Err(Error::NotEnoughSpace(self.cap(), self.wcap(), n)) + } else { + let ret = &mut self.buf[self.wp..self.wp + n]; + self.wp += n; + Ok(ret) + } + } + + #[inline(always)] + pub fn rewind(&mut self) { + self.buf.copy_within(self.rp..self.wp, 0); + self.wp -= self.rp; + self.rp = 0; + } + + #[inline(always)] + pub fn rewind_if_needed(&mut self, need_min: usize) { + check_invariants!(self); + if self.rp != 0 && self.rp == self.wp { + self.rp = 0; + self.wp = 0; + } else if self.cap() < self.rp + need_min { + self.rewind(); + } + } + + pub fn available_writable_area(&mut self, need_min: usize) -> Result<&mut [u8], Error> { + check_invariants!(self); + self.rewind_if_needed(need_min); + if self.wcap() < need_min { + self.rewind(); + } + if self.wcap() < need_min { + Err(Error::NotEnoughSpace(self.cap(), self.wcap(), need_min)) + } else { + let ret = &mut self.buf[self.wp..]; + Ok(ret) + } + } + + pub fn put_slice(&mut self, buf: &[u8]) -> Result<(), Error> { + check_invariants!(self); + self.rewind_if_needed(buf.len()); + if self.wcap() < buf.len() { + self.rewind(); + } + if self.wcap() < buf.len() { + return Err(Error::NotEnoughSpace(self.cap(), self.wcap(), buf.len())); + } else { + self.buf[self.wp..self.wp + buf.len()].copy_from_slice(buf); + self.wp += buf.len(); + Ok(()) + } + } + + pub fn put_u8(&mut self, v: u8) -> Result<(), Error> { + check_invariants!(self); + type T = u8; + const TS: usize = std::mem::size_of::(); + self.rewind_if_needed(TS); + if self.wcap() < TS { + self.rewind(); + } + if self.wcap() < TS { + return Err(Error::NotEnoughSpace(self.cap(), self.wcap(), TS)); + } else { + self.buf[self.wp..self.wp + TS].copy_from_slice(&v.to_be_bytes()); + self.wp += TS; + Ok(()) + } + } + + pub fn put_u16_be(&mut self, v: u16) -> Result<(), Error> { + check_invariants!(self); + type T = u16; + const TS: usize = std::mem::size_of::(); + self.rewind_if_needed(TS); + if self.wcap() < TS { + self.rewind(); + } + if self.wcap() < TS { + return Err(Error::NotEnoughSpace(self.cap(), self.wcap(), TS)); + } else { + self.buf[self.wp..self.wp + TS].copy_from_slice(&v.to_be_bytes()); + self.wp += TS; + Ok(()) + } + } + + pub fn put_u32_be(&mut self, v: u32) -> Result<(), Error> { + check_invariants!(self); + type T = u32; + const TS: usize = std::mem::size_of::(); + self.rewind_if_needed(TS); + if self.wcap() < TS { + self.rewind(); + } + if self.wcap() < TS { + return Err(Error::NotEnoughSpace(self.cap(), self.wcap(), TS)); + } else { + self.buf[self.wp..self.wp + TS].copy_from_slice(&v.to_be_bytes()); + self.wp += TS; + Ok(()) + } + } + + pub fn put_u64_be(&mut self, v: u64) -> Result<(), Error> { + check_invariants!(self); + type T = u64; + const TS: usize = std::mem::size_of::(); + self.rewind_if_needed(TS); + if self.wcap() < TS { + self.rewind(); + } + if self.wcap() < TS { + return Err(Error::NotEnoughSpace(self.cap(), self.wcap(), TS)); + } else { + self.buf[self.wp..self.wp + TS].copy_from_slice(&v.to_be_bytes()); + self.wp += TS; + Ok(()) + } + } + + pub fn put_f32_be(&mut self, v: f32) -> Result<(), Error> { + check_invariants!(self); + type T = f32; + const TS: usize = std::mem::size_of::(); + self.rewind_if_needed(TS); + if self.wcap() < TS { + self.rewind(); + } + if self.wcap() < TS { + return Err(Error::NotEnoughSpace(self.cap(), self.wcap(), TS)); + } else { + self.buf[self.wp..self.wp + TS].copy_from_slice(&v.to_be_bytes()); + self.wp += TS; + Ok(()) + } + } + + pub fn put_f64_be(&mut self, v: f64) -> Result<(), Error> { + check_invariants!(self); + type T = f64; + const TS: usize = std::mem::size_of::(); + self.rewind_if_needed(TS); + if self.wcap() < TS { + self.rewind(); + } + if self.wcap() < TS { + return Err(Error::NotEnoughSpace(self.cap(), self.wcap(), TS)); + } else { + self.buf[self.wp..self.wp + TS].copy_from_slice(&v.to_be_bytes()); + self.wp += TS; + Ok(()) + } + } + + #[allow(unused)] + fn check_invariants(&self) { + if self.wp > self.buf.len() { + eprintln!("ERROR netbuf wp {} rp {}", self.wp, self.rp); + std::process::exit(87); + } + if self.rp > self.wp { + eprintln!("ERROR netbuf wp {} rp {}", self.wp, self.rp); + std::process::exit(87); + } + } +} + +impl fmt::Debug for SlideBuf { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("SlideBuf") + .field("cap", &self.cap()) + .field("wp", &self.wp) + .field("rp", &self.rp) + .finish() + } +} diff --git a/src/streamtimeout.rs b/src/streamtimeout.rs new file mode 100644 index 0000000..a23cb26 --- /dev/null +++ b/src/streamtimeout.rs @@ -0,0 +1,85 @@ +use futures_util::FutureExt; +use futures_util::Stream; +use futures_util::StreamExt; +use std::future::Future; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use std::time::Duration; +use std::time::Instant; + +pub type BoxedTimeoutFuture = Pin + Send>>; + +pub trait StreamTimeout2: Send { + fn timeout_intervals(&self, ivl: Duration) -> BoxedTimeoutFuture; +} + +pub struct TimeoutableStream { + ivl: Duration, + timeout_provider: Box, + inp: Pin>, + timeout_fut: BoxedTimeoutFuture, + last_seen: Instant, +} + +impl TimeoutableStream +where + S: Stream, +{ + pub fn new(ivl: Duration, timeout_provider: Box, inp: S) -> Self { + let timeout_fut = timeout_provider.timeout_intervals(ivl); + Self { + ivl, + timeout_provider, + inp: Box::pin(inp), + timeout_fut, + last_seen: Instant::now(), + } + } + + fn resetup(mut self: Pin<&mut Self>, ivl: Duration) -> () { + self.timeout_fut = self.timeout_provider.timeout_intervals(ivl) + } + + fn handle_timeout(self: Pin<&mut Self>, cx: &mut Context) -> Poll::Item>> { + use Poll::*; + let tsnow = Instant::now(); + if self.last_seen + self.ivl < tsnow { + let ivl2 = self.ivl; + self.resetup(ivl2); + Ready(Some(None)) + } else { + let ivl2 = (self.last_seen + self.ivl) - tsnow + Duration::from_millis(1); + self.resetup(ivl2); + cx.waker().wake_by_ref(); + Pending + } + } + + fn handle_inp_pending(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll::Item>> { + use Poll::*; + match self.timeout_fut.poll_unpin(cx) { + Ready(()) => self.handle_timeout(cx), + Pending => Pending, + } + } +} + +impl Stream for TimeoutableStream +where + S: Stream, +{ + type Item = Option<::Item>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + match self.inp.poll_next_unpin(cx) { + Ready(Some(x)) => { + self.last_seen = Instant::now(); + Ready(Some(Some(x))) + } + Ready(None) => Ready(None), + Pending => self.handle_inp_pending(cx), + } + } +} diff --git a/src/tcprawclient.rs b/src/tcprawclient.rs new file mode 100644 index 0000000..c17b9f3 --- /dev/null +++ b/src/tcprawclient.rs @@ -0,0 +1,205 @@ +use crate::frames::eventsfromframes::EventsFromFrames; +use crate::frames::inmem::BoxedBytesStream; +use crate::frames::inmem::InMemoryFrameStream; +use bytes::Bytes; +use bytes::BytesMut; +use futures_util::Future; +use futures_util::Stream; +use futures_util::StreamExt; +use futures_util::TryStreamExt; +use http::Uri; +use http_body_util::BodyExt; +use items_0::framable::FrameTypeInnerStatic; +use items_0::streamitem::sitem_data; +use items_0::streamitem::sitem_err2_from_string; +use items_0::streamitem::Sitemty; +use items_2::eventfull::EventFull; +use items_2::framable::EventQueryJsonStringFrame; +use items_2::framable::Framable; +use netpod::log::*; +use netpod::range::evrange::SeriesRange; +use netpod::ByteSize; +use netpod::ChannelTypeConfigGen; +use netpod::Node; +use netpod::ReqCtx; +use netpod::APP_OCTET; +use query::api4::events::EventsSubQuery; +use query::api4::events::EventsSubQuerySelect; +use query::api4::events::EventsSubQuerySettings; +use query::api4::events::Frame1Parts; +use query::transform::TransformQuery; +use serde::de::DeserializeOwned; +use std::fmt; +use std::pin::Pin; +use std::sync::Arc; + +pub const TEST_BACKEND: &str = "testbackend-00"; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "TcpRawClient")] +pub enum Error { + IO(#[from] std::io::Error), + Msg(String), + Frame(#[from] items_2::frame::Error), + Framable(#[from] items_2::framable::Error), + Json(#[from] serde_json::Error), + Http(#[from] http::Error), + // HttpClient(#[from] httpclient::Error), + // Hyper(#[from] httpclient::hyper::Error), + #[error("ServerError({0:?}, {1})")] + ServerError(http::response::Parts, String), + HttpBody(Box), +} + +struct ErrMsg(E) +where + E: ToString; + +impl From> for Error +where + E: ToString, +{ + fn from(value: ErrMsg) -> Self { + Self::Msg(value.0.to_string()) + } +} + +impl From for Error { + fn from(value: String) -> Self { + Self::Msg(value) + } +} + +pub trait OpenBoxedBytesStreams { + fn open( + &self, + subq: EventsSubQuery, + // TODO take by Arc + ctx: ReqCtx, + ) -> Pin, Error>> + Send>>; +} + +pub type OpenBoxedBytesStreamsBox = Pin>; + +pub fn make_node_command_frame(query: EventsSubQuery) -> Result { + let obj = Frame1Parts::new(query); + let ret = serde_json::to_string(&obj)?; + Ok(EventQueryJsonStringFrame(ret)) +} + +#[derive(Debug, thiserror::Error)] +pub enum ErrorBody { + #[error("{0}")] + Msg(String), +} + +pub trait HttpSimplePost: Send { + fn http_simple_post( + &self, + req: http::Request>, + ) -> Pin< + Box>> + Send>, + >; +} + +pub async fn read_body_bytes(mut body: B) -> Result +where + B: http_body::Body + Unpin, + ::Error: std::error::Error + Send + 'static, +{ + use bytes::BufMut; + use http_body_util::BodyExt; + let mut buf = BytesMut::new(); + while let Some(x) = body.frame().await { + let mut frame = x.map_err(|e| Error::HttpBody(Box::new(e)))?; + if let Some(x) = frame.data_mut() { + buf.put(x); + } + } + Ok(buf.freeze()) +} + +pub async fn x_processed_event_blobs_stream_from_node_http( + subq: EventsSubQuery, + node: Node, + post: Box, + ctx: &ReqCtx, +) -> Result> + Send>>, Error> { + use http::header; + use http::Method; + use http::Request; + use http::StatusCode; + let frame1 = make_node_command_frame(subq.clone())?; + let item = sitem_data(frame1.clone()); + let buf = item.make_frame_dyn()?.freeze(); + let url = node.baseurl().join("/api/4/private/eventdata/frames").unwrap(); + debug!("open_event_data_streams_http post {url}"); + let uri: Uri = url.as_str().parse().unwrap(); + let body = http_body_util::Full::new(buf); + let req = Request::builder() + .method(Method::POST) + .uri(&uri) + .header(header::HOST, uri.host().unwrap()) + .header(header::ACCEPT, APP_OCTET) + .header(ctx.header_name(), ctx.header_value()) + .body(body)?; + let res = post.http_simple_post(req).await; + if res.status() != StatusCode::OK { + let (head, body) = res.into_parts(); + error!("server error {:?}", head); + let buf = read_body_bytes(body).await?; + let s = String::from_utf8_lossy(&buf); + return Err(Error::ServerError(head, s.to_string())); + } + let (_head, body) = res.into_parts(); + let inp = body; + let inp = inp.into_data_stream(); + let inp = inp.map(|x| match x { + Ok(x) => Ok(x), + Err(e) => Err(sitem_err2_from_string(e)), + }); + let inp = Box::pin(inp) as BoxedBytesStream; + let frames = InMemoryFrameStream::new(inp, subq.inmem_bufcap()); + let frames = frames.map_err(sitem_err2_from_string); + let frames = Box::pin(frames); + let stream = EventsFromFrames::new(frames, url.to_string()); + debug!("open_event_data_streams_http done {url}"); + Ok(Box::pin(stream)) +} + +pub fn container_stream_from_bytes_stream( + inp: BoxedBytesStream, + bufcap: ByteSize, + dbgdesc: String, +) -> Result>, Error> +where + T: FrameTypeInnerStatic + DeserializeOwned + Send + Unpin + fmt::Debug + 'static, +{ + let frames = InMemoryFrameStream::new(inp, bufcap); + let frames = frames.map_err(sitem_err2_from_string); + // TODO let EventsFromFrames accept also non-boxed input? + let frames = Box::pin(frames); + let stream = EventsFromFrames::::new(frames, dbgdesc); + Ok(stream) +} + +pub fn make_sub_query( + ch_conf: ChannelTypeConfigGen, + range: SeriesRange, + one_before_range: bool, + transform: TransformQuery, + sub: SUB, + log_level: String, + ctx: &ReqCtx, +) -> EventsSubQuery +where + SUB: Into, +{ + let mut select = EventsSubQuerySelect::new(ch_conf, range, one_before_range, transform.clone()); + if let Some(wasm1) = transform.do_wasm() { + select.set_wasm1(wasm1.into()); + } + let settings = sub.into(); + let subq = EventsSubQuery::from_parts(select, settings, ctx.reqid().into(), log_level); + subq +} diff --git a/src/test.rs b/src/test.rs new file mode 100644 index 0000000..a4ad10a --- /dev/null +++ b/src/test.rs @@ -0,0 +1,62 @@ +mod collect; +mod events; +mod timebin; + +use futures_util::stream; +use futures_util::Stream; +use items_0::streamitem::sitem_data; +use items_0::streamitem::Sitemty; +use items_0::Appendable; +use items_0::Empty; +use items_2::channelevents::ChannelEvents; +use items_2::eventsdim0::EventsDim0; +use netpod::timeunits::SEC; +use std::pin::Pin; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "StreamsTest")] +pub enum Error {} + +type BoxedEventStream = Pin> + Send>>; + +// TODO use some xorshift generator. + +fn inmem_test_events_d0_i32_00() -> BoxedEventStream { + let mut evs = EventsDim0::empty(); + evs.push(SEC * 1, 1, 10001); + evs.push(SEC * 4, 4, 10004); + let cev = ChannelEvents::Events(Box::new(evs)); + let item = sitem_data(cev); + let stream = stream::iter([item]); + Box::pin(stream) +} + +fn inmem_test_events_d0_i32_01() -> BoxedEventStream { + let mut evs = EventsDim0::empty(); + evs.push(SEC * 2, 2, 10002); + let cev = ChannelEvents::Events(Box::new(evs)); + let item = sitem_data(cev); + let stream = stream::iter([item]); + Box::pin(stream) +} + +#[test] +fn merge_mergeable_00() -> Result<(), Error> { + let fut = async { + let inp0 = inmem_test_events_d0_i32_00(); + let inp1 = inmem_test_events_d0_i32_01(); + let _merger = items_2::merger::Merger::new(vec![inp0, inp1], Some(4)); + Ok(()) + }; + runfut(fut) +} + +fn runfut(fut: F) -> Result +where + F: std::future::Future>, + E: std::error::Error, +{ + // taskrun::run(fut) + let _ = fut; + todo!() +} diff --git a/src/test/collect.rs b/src/test/collect.rs new file mode 100644 index 0000000..862aabb --- /dev/null +++ b/src/test/collect.rs @@ -0,0 +1,127 @@ +use crate::collect::Collect; +use crate::collect::CollectResult; +use crate::test::runfut; +use crate::transform::build_event_transform; +use crate::transform::EventsToTimeBinnable; +use futures_util::stream; +use futures_util::StreamExt; +use items_0::on_sitemty_data; +use items_0::streamitem::sitem_data; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::StreamItem; +use items_0::WithLen; +use items_2::eventsdim0::EventsDim0CollectorOutput; +use items_2::streams::PlainEventStream; +use items_2::testgen::make_some_boxed_d0_f32; +use netpod::log::*; +use netpod::timeunits::SEC; +use netpod::FromUrl; +use query::transform::TransformQuery; +use std::time::Duration; +use std::time::Instant; + +// #[test] +// fn collect_channel_events_00() -> Result<(), Error> { +// let fut = async { +// let evs0 = make_some_boxed_d0_f32(20, SEC * 10, SEC * 1, 0, 28736487); +// let evs1 = make_some_boxed_d0_f32(20, SEC * 30, SEC * 1, 0, 882716583); +// let stream = stream::iter(vec![ +// sitem_data(evs0), +// sitem_data(evs1), +// Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)), +// ]); +// let deadline = Instant::now() + Duration::from_millis(4000); +// let events_max = 10000; +// let res = crate::collect::collect(stream, deadline, events_max, None, None).await?; +// //eprintln!("collected result: {res:?}"); +// if let Some(res) = res.as_any_ref().downcast_ref::>() { +// eprintln!("Great, a match"); +// eprintln!("{res:?}"); +// assert_eq!(res.len(), 40); +// } else { +// return Err(Error::with_msg(format!("bad type of collected result"))); +// } +// Ok(()) +// }; +// runfut(fut) +// } + +// #[test] +// fn collect_channel_events_01() -> Result<(), Error> { +// let fut = async { +// let evs0 = make_some_boxed_d0_f32(20, SEC * 10, SEC * 1, 0, 28736487); +// let evs1 = make_some_boxed_d0_f32(20, SEC * 30, SEC * 1, 0, 882716583); +// let stream = stream::iter(vec![ +// sitem_data(evs0), +// sitem_data(evs1), +// Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)), +// ]); +// // TODO build like in request code +// let deadline = Instant::now() + Duration::from_millis(4000); +// let events_max = 10000; +// let bytes_max = 80 * 10000; +// let stream = PlainEventStream::new(stream); +// let stream = EventsToTimeBinnable::new(stream); +// let stream = TimeBinnableToCollectable::new(stream); +// let stream = Box::pin(stream); +// let res = Collect::new(stream, deadline, events_max, bytes_max, None, None).await?; +// if let CollectResult::Some(res) = res { +// if let Some(res) = res.as_any_ref().downcast_ref::>() { +// eprintln!("Great, a match"); +// eprintln!("{res:?}"); +// assert_eq!(res.len(), 40); +// } else { +// return Err(Error::with_msg(format!("bad type of collected result"))); +// } +// Ok(()) +// } else { +// return Err(Error::with_msg(format!("bad type of collected result"))); +// } +// }; +// runfut(fut) +// } + +// #[test] +// fn collect_channel_events_pulse_id_diff() -> Result<(), Error> { +// let fut = async { +// let trqu = TransformQuery::from_url(&"https://data-api.psi.ch/?binningScheme=pulseIdDiff".parse()?)?; +// info!("{trqu:?}"); +// let evs0 = make_some_boxed_d0_f32(20, SEC * 10, SEC * 1, 0, 28736487); +// let evs1 = make_some_boxed_d0_f32(20, SEC * 30, SEC * 1, 0, 882716583); +// let stream = stream::iter(vec![ +// sitem_data(evs0), +// sitem_data(evs1), +// Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)), +// ]); +// let mut tr = build_event_transform(&trqu)?; +// let stream = stream.map(move |x| { +// on_sitemty_data!(x, |x| { +// let x = tr.0.transform(x); +// Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))) +// }) +// }); +// let stream = PlainEventStream::new(stream); +// let stream = EventsToTimeBinnable::new(stream); +// let deadline = Instant::now() + Duration::from_millis(4000); +// let events_max = 10000; +// let bytes_max = 80 * 10000; +// let stream = Box::pin(stream); +// let stream = build_time_binning_transform(&trqu, stream)?; +// let stream = TimeBinnableToCollectable::new(stream); +// let stream = Box::pin(stream); +// let res = Collect::new(stream, deadline, events_max, bytes_max, None, None).await?; +// if let CollectResult::Some(res) = res { +// if let Some(res) = res.as_any_ref().downcast_ref::>() { +// eprintln!("Great, a match"); +// eprintln!("{res:?}"); +// assert_eq!(res.len(), 40); +// } else { +// return Err(Error::with_msg(format!("bad type of collected result"))); +// } +// Ok(()) +// } else { +// return Err(Error::with_msg(format!("bad type of collected result"))); +// } +// }; +// runfut(fut) +// } diff --git a/src/test/events.rs b/src/test/events.rs new file mode 100644 index 0000000..c594887 --- /dev/null +++ b/src/test/events.rs @@ -0,0 +1,97 @@ +use crate::cbor_stream::FramedBytesToSitemtyDynEventsStream; +use crate::firsterr::only_first_err; +use crate::frames::inmem::BoxedBytesStream; +use crate::lenframed; +use crate::plaineventscbor::plain_events_cbor_stream; +use crate::tcprawclient::OpenBoxedBytesStreams; +use crate::tcprawclient::TEST_BACKEND; +use futures_util::future; +use futures_util::Future; +use futures_util::StreamExt; +use futures_util::TryFutureExt; +use netpod::log::*; +use netpod::range::evrange::NanoRange; +use netpod::range::evrange::SeriesRange; +use netpod::ChConf; +use netpod::ReqCtx; +use netpod::ScalarType; +use netpod::SeriesKind; +use netpod::SfDbChannel; +use netpod::Shape; +use query::api4::events::EventsSubQuery; +use query::api4::events::PlainEventsQuery; +use std::pin::Pin; +use std::sync::Arc; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "TestEvents")] +pub enum Error { + InMem(#[from] crate::frames::inmem::Error), + Generator(#[from] crate::generators::Error), +} + +#[test] +fn merged_events_cbor() { + crate::test::runfut(merged_events_inner()).unwrap(); +} + +async fn merged_events_inner() -> Result<(), Error> { + let ctx = ReqCtx::for_test(); + // TODO factor out the channel config lookup such that the test code can use a similar code path, + // except that we don't want to go over the network here. + let ch_conf = ChConf::new( + TEST_BACKEND, + 1, + SeriesKind::ChannelData, + ScalarType::I32, + Shape::Scalar, + "test-gen-i32-dim0-v00", + ); + let channel = SfDbChannel::from_name(ch_conf.backend(), ch_conf.name()); + let range = SeriesRange::TimeRange(NanoRange::from_date_time( + "2023-12-18T05:10:00Z".parse().unwrap(), + "2023-12-18T05:12:00Z".parse().unwrap(), + )); + let evq = PlainEventsQuery::new(channel, range); + let open_bytes = StreamOpener::new(); + let open_bytes = Arc::pin(open_bytes); + let stream = plain_events_cbor_stream(&evq, ch_conf.clone().into(), &ctx, open_bytes, todo!()) + .await + .unwrap(); + let stream = lenframed::length_framed(stream); + let stream = + FramedBytesToSitemtyDynEventsStream::new(stream, ch_conf.scalar_type().clone(), ch_conf.shape().clone()); + let stream = only_first_err(stream); + stream + .for_each(|item| { + debug!("{item:?}"); + future::ready(()) + }) + .await; + Ok(()) +} + +struct StreamOpener {} + +impl StreamOpener { + fn new() -> Self { + Self {} + } +} + +impl OpenBoxedBytesStreams for StreamOpener { + fn open( + &self, + subq: EventsSubQuery, + _ctx: ReqCtx, + ) -> Pin, crate::tcprawclient::Error>> + Send>> { + Box::pin(stream_opener(subq).map_err(|e| crate::tcprawclient::Error::Msg(format!("{e}")))) + } +} + +async fn stream_opener(subq: EventsSubQuery) -> Result, Error> { + let mut streams = Vec::new(); + let stream = crate::generators::make_test_channel_events_bytes_stream(subq, 1, 0)?; + streams.push(stream); + Ok(streams) +} diff --git a/src/test/timebin.rs b/src/test/timebin.rs new file mode 100644 index 0000000..7a20ab3 --- /dev/null +++ b/src/test/timebin.rs @@ -0,0 +1,445 @@ +// use crate::collect::collect; +// use crate::generators::GenerateI32V00; +// use crate::generators::GenerateI32V01; +// use crate::itemclone::Itemclone; +// use crate::test::runfut; +// use crate::timebin::TimeBinnedStream; +// use crate::transform::build_event_transform; +// use err::Error; +// use futures_util::stream; +// use futures_util::StreamExt; +// use items_0::on_sitemty_data; +// use items_0::streamitem::sitem_data; +// use items_0::streamitem::RangeCompletableItem; +// use items_0::streamitem::StreamItem; +// use items_0::AppendAllFrom; +// use items_0::Empty; +// use items_2::binsdim0::BinsDim0; +// use items_2::channelevents::ChannelEvents; +// use items_2::channelevents::ConnStatus; +// use items_2::channelevents::ConnStatusEvent; +// use items_2::eventsdim0::EventsDim0; +// use items_2::testgen::make_some_boxed_d0_f32; +// use netpod::range::evrange::NanoRange; +// use netpod::range::evrange::SeriesRange; +// use netpod::timeunits::MS; +// use netpod::timeunits::SEC; +// use netpod::BinnedRangeEnum; +// use query::transform::TransformQuery; +// use serde_json::Value as JsValue; +// use std::collections::VecDeque; +// use std::time::Duration; +// use std::time::Instant; + +// fn nano_range_from_str(beg_date: &str, end_date: &str) -> Result { +// let beg_date = beg_date.parse()?; +// let end_date = end_date.parse()?; +// let range = NanoRange::from_date_time(beg_date, end_date); +// Ok(range) +// } + +// #[test] +// fn time_bin_00() -> Result<(), Error> { +// let fut = async { +// let range = nano_range_from_str("1970-01-01T00:00:00Z", "1970-01-01T00:00:08Z")?; +// let range = SeriesRange::TimeRange(range); +// let min_bin_count = 8; +// let binned_range = BinnedRangeEnum::covering_range(range, min_bin_count)?; +// let evs0 = make_some_boxed_d0_f32(10, SEC * 1, MS * 500, 0, 1846713782); +// let v00 = ChannelEvents::Events(Box::new(EventsDim0::::empty())); +// let v01 = ChannelEvents::Events(evs0); +// let v02 = ChannelEvents::Status(Some(ConnStatusEvent::new(MS * 100, ConnStatus::Connect))); +// let v03 = ChannelEvents::Status(Some(ConnStatusEvent::new(MS * 6000, ConnStatus::Disconnect))); +// let stream0 = Box::pin(stream::iter(vec![ +// // +// sitem_data(v00), +// sitem_data(v02), +// sitem_data(v01), +// sitem_data(v03), +// ])); +// let mut exps = { +// let mut d = VecDeque::new(); +// let bins = BinsDim0::empty(); +// d.push_back(bins); +// let mut bins = BinsDim0::empty(); +// // Currently can not cosntruct bins without minmaxlst +// // bins.push(SEC * 0, SEC * 1, 0, 0.0, 0.0, 0.0); +// bins.push(SEC * 1, SEC * 2, 2, 0.0535830, 100.0589, 50.05624, 100.0589); +// bins.push(SEC * 2, SEC * 3, 2, 200.06143, 300.07645, 250.06894, 300.07645); +// bins.push(SEC * 3, SEC * 4, 2, 400.08554, 500.05222, 450.06888, 500.05222); +// bins.push(SEC * 4, SEC * 5, 2, 600.0025, 700.09094, 650.04675, 700.09094); +// d.push_back(bins); +// let mut bins = BinsDim0::empty(); +// bins.push(SEC * 5, SEC * 6, 2, 800.0619, 900.02844, 850.04517, 900.02844); +// d.push_back(bins); +// d +// }; +// let mut binned_stream = TimeBinnedStream::new(stream0, binned_range, true); +// while let Some(item) = binned_stream.next().await { +// eprintln!("{item:?}"); +// match item { +// Ok(item) => match item { +// StreamItem::DataItem(item) => match item { +// RangeCompletableItem::Data(item) => { +// if let Some(item) = item.as_any_ref().downcast_ref::>() { +// let exp = exps.pop_front().unwrap(); +// if !item.equal_slack(&exp) { +// eprintln!("-----------------------"); +// eprintln!("item {:?}", item); +// eprintln!("-----------------------"); +// eprintln!("exp {:?}", exp); +// eprintln!("-----------------------"); +// return Err(Error::with_msg_no_trace(format!("bad, content not equal"))); +// } +// } else { +// return Err(Error::with_msg_no_trace(format!("bad, got item with unexpected type"))); +// } +// } +// RangeCompletableItem::RangeComplete => {} +// }, +// StreamItem::Log(_) => {} +// StreamItem::Stats(_) => {} +// }, +// Err(e) => Err(e).unwrap(), +// } +// } +// Ok(()) +// }; +// runfut(fut) +// } + +// #[test] +// fn time_bin_01() -> Result<(), Error> { +// let fut = async { +// let range = nano_range_from_str("1970-01-01T00:00:00Z", "1970-01-01T00:00:08Z")?; +// let range = SeriesRange::TimeRange(range); +// let min_bin_count = 8; +// let binned_range = BinnedRangeEnum::covering_range(range, min_bin_count)?; +// let v00 = ChannelEvents::Events(Box::new(EventsDim0::::empty())); +// let evs0 = make_some_boxed_d0_f32(10, SEC * 1, MS * 500, 0, 1846713782); +// let evs1 = make_some_boxed_d0_f32(10, SEC * 6, MS * 500, 0, 1846713781); +// let v01 = ChannelEvents::Events(evs0); +// let v02 = ChannelEvents::Events(evs1); +// let stream0 = stream::iter(vec![ +// // +// sitem_data(v00), +// sitem_data(v01), +// sitem_data(v02), +// ]); +// let stream0 = stream0.then({ +// let mut i = 0; +// move |x| { +// let delay = if i == 1 { 2000 } else { 0 }; +// i += 1; +// let dur = Duration::from_millis(delay); +// async move { +// tokio::time::sleep(dur).await; +// x +// } +// } +// }); +// let stream0 = Box::pin(stream0); +// let mut binned_stream = TimeBinnedStream::new(stream0, binned_range, true); +// while let Some(item) = binned_stream.next().await { +// if true { +// eprintln!("{item:?}"); +// } +// match item { +// Ok(item) => match item { +// StreamItem::DataItem(item) => match item { +// RangeCompletableItem::Data(item) => { +// if let Some(_) = item.as_any_ref().downcast_ref::>() { +// } else { +// return Err(Error::with_msg_no_trace(format!("bad, got item with unexpected type"))); +// } +// } +// RangeCompletableItem::RangeComplete => {} +// }, +// StreamItem::Log(_) => {} +// StreamItem::Stats(_) => {} +// }, +// Err(e) => Err(e).unwrap(), +// } +// } +// // TODO assert that we get the bins which are sure to be ready. +// // TODO assert correct numbers. +// // TODO assert that we don't get bins which may be still changing. +// // TODO add similar test case with a RangeComplete event at different places before the timeout. +// Ok(()) +// }; +// runfut(fut) +// } + +// #[test] +// fn time_bin_02() -> Result<(), Error> { +// let fut = async { +// let do_time_weight = true; +// let deadline = Instant::now() + Duration::from_millis(4000); +// let range = nano_range_from_str("1970-01-01T00:20:04Z", "1970-01-01T00:22:10Z")?; +// let range = SeriesRange::TimeRange(range); +// // TODO add test: 26 bins should result in next higher resolution. +// let min_bin_count = 25; +// let expected_bin_count = 26; +// let binned_range = BinnedRangeEnum::covering_range(range.clone(), min_bin_count)?; +// eprintln!("binned_range: {:?}", binned_range); +// for i in 0.. { +// if let Some(r) = binned_range.range_at(i) { +// eprintln!("Series Range to cover: {r:?}"); +// } else { +// break; +// } +// } +// let event_range = binned_range.binned_range_time().full_range(); +// let series_range = SeriesRange::TimeRange(event_range); +// // TODO the test stream must be able to generate also one-before (on demand) and RangeComplete (by default). +// let stream = GenerateI32V00::new(0, 1, series_range, true); +// // TODO apply first some box dyn EventTransform which later is provided by TransformQuery. +// // Then the Merge will happen always by default for backends where this is needed. +// // TODO then apply the transform chain for the after-merged-stream. +// let stream = stream.map(|x| { +// let x = on_sitemty_data!(x, |x| Ok(StreamItem::DataItem(RangeCompletableItem::Data( +// Box::new(x) as Box +// )))); +// x +// }); +// let stream = Box::pin(stream); +// let mut binned_stream = TimeBinnedStream::new(stream, binned_range.clone(), do_time_weight); +// // From there on it should no longer be neccessary to distinguish whether its still events or time bins. +// // Then, optionally collect for output type like json, or stream as batches. +// // TODO the timebinner should already provide batches to make this efficient. +// if false { +// while let Some(e) = binned_stream.next().await { +// eprintln!("see item {e:?}"); +// let _x = on_sitemty_data!(e, |e| { +// // +// Ok(StreamItem::DataItem(RangeCompletableItem::Data(e))) +// }); +// } +// } else { +// let res = collect(binned_stream, deadline, 200, None, Some(binned_range)).await?; +// assert_eq!(res.len(), expected_bin_count); +// // use crate::json_stream::JsonBytes; +// let v = res.to_json_value()?; +// let d = serde_json::to_vec(&v)?; +// let s = String::from_utf8_lossy(&d); +// eprintln!("{s}"); +// let jsval: JsValue = serde_json::from_slice(&d)?; +// { +// let ts_anchor = jsval.get("tsAnchor").unwrap().as_u64().unwrap(); +// assert_eq!(ts_anchor, 1200); +// } +// { +// let counts = jsval.get("counts").unwrap().as_array().unwrap(); +// assert_eq!(counts.len(), expected_bin_count); +// for v in counts { +// assert_eq!(v.as_u64().unwrap(), 5); +// } +// } +// { +// let ts1ms = jsval.get("ts1Ms").unwrap().as_array().unwrap(); +// let mins = jsval.get("mins").unwrap().as_array().unwrap(); +// assert_eq!(mins.len(), expected_bin_count); +// for (ts1ms, min) in ts1ms.iter().zip(mins) { +// assert_eq!((ts1ms.as_u64().unwrap() / 100) % 1000, min.as_u64().unwrap()); +// } +// } +// { +// let ts1ms = jsval.get("ts1Ms").unwrap().as_array().unwrap(); +// let maxs = jsval.get("maxs").unwrap().as_array().unwrap(); +// assert_eq!(maxs.len(), expected_bin_count); +// for (ts1ms, max) in ts1ms.iter().zip(maxs) { +// assert_eq!((40 + ts1ms.as_u64().unwrap() / 100) % 1000, max.as_u64().unwrap()); +// } +// } +// { +// let range_final = jsval.get("rangeFinal").unwrap().as_bool().unwrap(); +// assert_eq!(range_final, true); +// } +// } +// Ok(()) +// }; +// runfut(fut) +// } + +// Should fail because of missing empty item. +// But should have some option to suppress the error log for this test case. +// #[test] +// fn time_bin_03() -> Result<(), Error> { +// // TODO re-enable with error log suppressed. +// if true { +// return Ok(()); +// } +// let fut = async { +// let range = nano_range_from_str("1970-01-01T00:00:00Z", "1970-01-01T00:00:08Z")?; +// let range = SeriesRange::TimeRange(range); +// let min_bin_count = 8; +// let binned_range = BinnedRangeEnum::covering_range(range, min_bin_count)?; +// let evs0 = make_some_boxed_d0_f32(10, SEC * 1, MS * 500, 0, 1846713782); +// //let v00 = ChannelEvents::Events(Box::new(EventsDim0::::empty())); +// let v01 = ChannelEvents::Events(evs0); +// let v02 = ChannelEvents::Status(Some(ConnStatusEvent::new(MS * 100, ConnStatus::Connect))); +// let v03 = ChannelEvents::Status(Some(ConnStatusEvent::new(MS * 6000, ConnStatus::Disconnect))); +// let stream0 = Box::pin(stream::iter(vec![ +// // +// //sitem_data(v00), +// sitem_data(v02), +// sitem_data(v01), +// sitem_data(v03), +// ])); +// let mut binned_stream = TimeBinnedStream::new(stream0, binned_range, true); +// while let Some(item) = binned_stream.next().await { +// eprintln!("{item:?}"); +// match item { +// Err(e) => { +// if e.to_string().contains("must emit but can not even create empty A") { +// return Ok(()); +// } else { +// return Err(Error::with_msg_no_trace("should not succeed")); +// } +// } +// _ => { +// return Err(Error::with_msg_no_trace("should not succeed")); +// } +// } +// } +// return Err(Error::with_msg_no_trace("should not succeed")); +// }; +// runfut(fut) +// } + +// #[test] +// fn timebin_multi_stage_00() -> Result<(), Error> { +// // TODO chain two timebin stages with different binning grid. +// let fut = async { +// let do_time_weight = true; +// let one_before_range = do_time_weight; +// let range = nano_range_from_str("1970-01-01T00:00:10Z", "1970-01-01T00:01:03Z")?; +// let range = SeriesRange::TimeRange(range); +// let binned_range_0 = BinnedRangeEnum::covering_range(range.clone(), 22)?; +// dbg!(&binned_range_0); +// let range: SeriesRange = binned_range_0.binned_range_time().to_nano_range().into(); +// let binned_range_1 = BinnedRangeEnum::covering_range(range.clone(), 48)?; +// dbg!(&binned_range_1); +// let stream_evs = GenerateI32V01::new(0, 1, range.clone(), one_before_range); +// let exp1 = { +// let mut bins = BinsDim0::::empty(); +// for i in 0..54 { +// bins.push( +// SEC * (10 + i), +// SEC * (11 + i), +// 2, +// 20 + 2 * i as i32, +// 21 + 2 * i as i32, +// 20.5 + 2. * i as f32, +// 21 + 2 * i as i32, +// ); +// } +// bins +// }; +// let exp2 = { +// let mut bins = BinsDim0::::empty(); +// for i in 0..27 { +// bins.push( +// SEC * (10 + 2 * i), +// SEC * (12 + 2 * i), +// 4, +// 20 + 4 * i as i32, +// 23 + 4 * i as i32, +// 21.5 + 4. * i as f32, +// 23 + 4 * i as i32, +// ); +// } +// bins +// }; +// // NOTE: +// // can store all bins in cache for which there is some non-empty bin following, or if the container has range-final. +// let (q1tx, q1rx) = async_channel::bounded(128); +// let (q2tx, q2rx) = async_channel::bounded(128); +// let stream_evs = Box::pin(stream_evs); +// let binned_stream = { +// TimeBinnedStream::new(stream_evs, binned_range_1, do_time_weight).map(|x| { +// //eprintln!("STAGE 1 -- {:?}", x); +// x +// }) +// }; +// let binned_stream = Itemclone::new(binned_stream, q1tx).map(|x| match x { +// Ok(x) => x, +// Err(e) => Err(e), +// }); +// let binned_stream = { +// TimeBinnedStream::new(Box::pin(binned_stream), binned_range_0, do_time_weight).map(|x| { +// eprintln!("STAGE -- 2 {:?}", x); +// x +// }) +// }; +// let binned_stream = Itemclone::new(binned_stream, q2tx).map(|x| match x { +// Ok(x) => x, +// Err(e) => Err(e), +// }); +// let mut have_range_final = false; +// let mut binned_stream = binned_stream; +// while let Some(item) = binned_stream.next().await { +// //eprintln!("{item:?}"); +// match item { +// Ok(item) => match item { +// StreamItem::DataItem(item) => match item { +// RangeCompletableItem::Data(item) => { +// if let Some(item) = item.as_any_ref().downcast_ref::>() { +// if false { +// eprintln!("-----------------------"); +// eprintln!("item {:?}", item); +// eprintln!("-----------------------"); +// } +// } else { +// return Err(Error::with_msg_no_trace(format!("bad, got item with unexpected type"))); +// } +// } +// RangeCompletableItem::RangeComplete => { +// have_range_final = true; +// } +// }, +// StreamItem::Log(_) => {} +// StreamItem::Stats(_) => {} +// }, +// Err(e) => Err(e).unwrap(), +// } +// } +// assert!(have_range_final); +// { +// eprintln!("---------------------------------------------------------------------"); +// let mut coll = BinsDim0::empty(); +// let stream = q1rx; +// while let Ok(item) = stream.recv().await { +// //eprintln!("RECV [q1rx] {:?}", item); +// // TODO use the transformed item +// let _item = on_sitemty_data!(item, |mut item: Box| { +// if let Some(k) = item.as_any_mut().downcast_mut::>() { +// coll.append_all_from(k); +// } +// sitem_data(item) +// }); +// } +// eprintln!("collected 1: {:?}", coll); +// assert_eq!(coll, exp1); +// } +// { +// eprintln!("---------------------------------------------------------------------"); +// let mut coll = BinsDim0::empty(); +// let stream = q2rx; +// while let Ok(item) = stream.recv().await { +// //eprintln!("RECV [q2rx] {:?}", item); +// // TODO use the transformed item +// let _item = on_sitemty_data!(item, |mut item: Box| { +// if let Some(k) = item.as_any_mut().downcast_mut::>() { +// coll.append_all_from(k); +// } +// sitem_data(item) +// }); +// } +// eprintln!("collected 1: {:?}", coll); +// assert_eq!(coll, exp2); +// } +// Ok(()) +// }; +// runfut(fut) +// } diff --git a/src/teststream.rs b/src/teststream.rs new file mode 100644 index 0000000..3460a9c --- /dev/null +++ b/src/teststream.rs @@ -0,0 +1,36 @@ +use crate::timebin::cached::reader::EventsReadProvider; +use crate::timebin::cached::reader::EventsReading; +use futures_util::Stream; +use items_0::streamitem::sitem_err2_from_string; +use items_0::streamitem::Sitemty; +use items_2::channelevents::ChannelEvents; +use netpod::range::evrange::SeriesRange; +use query::api4::events::EventsSubQuery; +use std::pin::Pin; + +fn make_stream(chname: &str, range: &SeriesRange) -> Pin> + Send>> { + if chname == "unittest;scylla;cont;scalar;f32" { + let e = sitem_err2_from_string(format!("unknown channel {chname}")); + let ret = futures_util::stream::iter([Err(e)]); + Box::pin(ret) + } else { + let e = sitem_err2_from_string(format!("unknown channel {chname}")); + let ret = futures_util::stream::iter([Err(e)]); + Box::pin(ret) + } +} + +pub struct UnitTestStream {} + +impl UnitTestStream { + pub fn new() -> Self { + Self {} + } +} + +impl EventsReadProvider for UnitTestStream { + fn read(&self, evq: EventsSubQuery) -> EventsReading { + let stream = make_stream(evq.name(), evq.range()); + EventsReading::new(stream) + } +} diff --git a/src/timebin.rs b/src/timebin.rs new file mode 100644 index 0000000..d4afb5b --- /dev/null +++ b/src/timebin.rs @@ -0,0 +1,13 @@ +pub mod cached; +pub mod fromevents; +pub mod timebin; + +mod basic; +pub(super) mod fromlayers; +mod gapfill; +mod grid; + +pub(super) use basic::TimeBinnedStream; +pub(super) use fromlayers::TimeBinnedFromLayers; + +pub use cached::reader::CacheReadProvider; diff --git a/src/timebin/basic.rs b/src/timebin/basic.rs new file mode 100644 index 0000000..1452495 --- /dev/null +++ b/src/timebin/basic.rs @@ -0,0 +1,278 @@ +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::sitem_data; +use items_0::streamitem::sitem_err_from_string; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::timebin::TimeBinnableTy; +use items_0::timebin::TimeBinnerTy; +use netpod::log::*; +use netpod::BinnedRangeEnum; +use std::any; +use std::fmt; +use std::ops::ControlFlow; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; + +macro_rules! debug_first { ($($arg:tt)*) => ( if false { debug!($($arg)*); } ) } + +macro_rules! trace2 { ($($arg:tt)*) => ( if false { trace!($($arg)*); } ) } + +macro_rules! trace3 { ($($arg:tt)*) => ( if false { trace!($($arg)*); } ) } + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "TimeBinnedStream")] +pub enum Error { + MissingBinnerAfterProcessItem, + CreateEmpty, + NoBinnerAfterInputDone, + Stream, + Msg(String), +} + +type SitemtyStream = Pin> + Send>>; + +pub struct TimeBinnedStream +where + T: TimeBinnableTy, +{ + inp: SitemtyStream, + range: BinnedRangeEnum, + do_time_weight: bool, + range_final: bool, + binner: Option<::TimeBinner>, + done_first_input: bool, + done_data: bool, + done: bool, + complete: bool, +} + +impl fmt::Debug for TimeBinnedStream +where + T: TimeBinnableTy, +{ + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct(any::type_name::()) + .field("range", &self.range) + .field("range_final", &self.range_final) + .field("binner", &self.binner) + .finish() + } +} + +impl TimeBinnedStream +where + T: TimeBinnableTy, +{ + pub fn new(inp: SitemtyStream, range: BinnedRangeEnum, do_time_weight: bool) -> Self { + Self { + inp, + range, + do_time_weight, + range_final: false, + binner: None, + done_first_input: false, + done_data: false, + done: false, + complete: false, + } + } + + fn process_item(&mut self, mut item: T) -> () { + let emit_empty_bins = true; + trace2!("process_item {item:?}"); + if self.binner.is_none() { + trace!("process_item call time_binner_new"); + let binner = item.time_binner_new(self.range.clone(), self.do_time_weight, emit_empty_bins); + self.binner = Some(binner); + } + let binner = self.binner.as_mut().unwrap(); + trace2!("process_item call binner ingest"); + binner.ingest(&mut item); + } + + fn handle_data_item( + &mut self, + item: T, + ) -> Result::TimeBinner as TimeBinnerTy>::Output>>>, Error> { + use ControlFlow::*; + use Poll::*; + trace2!("================= handle_data_item"); + let item_len = item.len(); + self.process_item(item); + let mut do_emit = false; + if self.done_first_input == false { + debug_first!( + "emit container after the first input len {} binner {}", + item_len, + self.binner.is_some() + ); + if self.binner.is_none() { + let e = Error::MissingBinnerAfterProcessItem; + self.done = true; + return Err(e); + } + do_emit = true; + self.done_first_input = true; + } + if let Some(binner) = self.binner.as_mut() { + trace3!("bins ready count {}", binner.bins_ready_count()); + if binner.bins_ready_count() > 0 { + do_emit = true + } + if do_emit { + if let Some(bins) = binner.bins_ready() { + Ok(Break(Ready(sitem_data(bins)))) + } else { + if let Some(bins) = binner.empty() { + Ok(Break(Ready(sitem_data(bins)))) + } else { + let e = Error::CreateEmpty; + error!("{e}"); + Err(e) + } + } + } else { + trace3!("not emit"); + Ok(ControlFlow::Continue(())) + } + } else { + warn!("processed item, but no binner yet"); + Ok(ControlFlow::Continue(())) + } + } + + fn handle_item( + &mut self, + item: Sitemty, + ) -> Result::TimeBinner as TimeBinnerTy>::Output>>>, Error> { + use ControlFlow::*; + use Poll::*; + trace2!("================= handle_item"); + match item { + Ok(item) => match item { + StreamItem::DataItem(item) => match item { + RangeCompletableItem::RangeComplete => { + debug!("see RangeComplete"); + self.range_final = true; + Ok(Continue(())) + } + RangeCompletableItem::Data(item) => self.handle_data_item(item), + }, + StreamItem::Log(item) => Ok(Break(Ready(Ok(StreamItem::Log(item))))), + StreamItem::Stats(item) => Ok(Break(Ready(Ok(StreamItem::Stats(item))))), + }, + Err(e) => { + error!("received error item: {e}"); + self.done = true; + Err(Error::Msg(e.to_string())) + } + } + } + + fn handle_none( + &mut self, + ) -> Result::TimeBinner as TimeBinnerTy>::Output>>>, Error> { + use ControlFlow::*; + use Poll::*; + trace2!("================= handle_none"); + let self_range_final = self.range_final; + if let Some(binner) = self.binner.as_mut() { + trace2!("bins ready count before finish {}", binner.bins_ready_count()); + // TODO rework the finish logic + if self_range_final { + binner.set_range_complete(); + } + binner.push_in_progress(false); + trace2!("bins ready count after finish {}", binner.bins_ready_count()); + if let Some(bins) = binner.bins_ready() { + self.done_data = true; + Ok(Break(Ready(sitem_data(bins)))) + } else { + if let Some(bins) = binner.empty() { + self.done_data = true; + Ok(Break(Ready(sitem_data(bins)))) + } else { + let e = Error::CreateEmpty; + error!("{e}"); + self.done_data = true; + Err(e) + } + } + } else { + warn!("input stream finished, still no binner"); + self.done_data = true; + let e = Error::NoBinnerAfterInputDone; + Err(e) + } + } + + // TODO + // Original block inside the poll loop was able to: + // continue + // break with Poll> + fn poll_input( + &mut self, + cx: &mut Context, + ) -> Result::TimeBinner as TimeBinnerTy>::Output>>>, Error> { + use ControlFlow::*; + use Poll::*; + trace2!("================= poll_input"); + match self.inp.poll_next_unpin(cx) { + Ready(Some(item)) => self.handle_item(item), + Ready(None) => self.handle_none(), + Pending => Ok(Break(Pending)), + } + } +} + +impl Stream for TimeBinnedStream +where + T: TimeBinnableTy + Unpin, +{ + type Item = Sitemty<<::TimeBinner as TimeBinnerTy>::Output>; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + let span = span!(Level::INFO, "TimeBinner"); + let _spg = span.enter(); + trace2!("================= POLL"); + loop { + break if self.complete { + error!("TimeBinnedStream poll on complete"); + panic!("TimeBinnedStream poll on complete") + } else if self.done { + self.complete = true; + Ready(None) + } else if self.done_data { + self.done = true; + if self.range_final { + info!("TimeBinnedStream EMIT RANGE FINAL"); + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))) + } else { + continue; + } + } else { + match self.poll_input(cx) { + Ok(item) => match item { + ControlFlow::Continue(()) => continue, + ControlFlow::Break(item) => match item { + Ready(item) => break Ready(Some(item)), + Pending => break Pending, + }, + }, + Err(e) => { + self.done = true; + break Ready(Some(sitem_err_from_string(e))); + } + } + }; + } + } +} + +//impl WithTransformProperties for TimeBinnedStream where T: TimeBinnableTy {} + +//impl TimeBinnableStreamTrait for TimeBinnedStream where T: TimeBinnableTy {} diff --git a/src/timebin/cached.rs b/src/timebin/cached.rs new file mode 100644 index 0000000..749e400 --- /dev/null +++ b/src/timebin/cached.rs @@ -0,0 +1,4 @@ +// mods for: +// time-binned at any resolution. + +pub mod reader; diff --git a/src/timebin/cached/reader.rs b/src/timebin/cached/reader.rs new file mode 100644 index 0000000..b7dd943 --- /dev/null +++ b/src/timebin/cached/reader.rs @@ -0,0 +1,192 @@ +use crate as streams; +use futures_util::FutureExt; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::Sitemty; +use items_0::timebin::BinsBoxed; +use items_2::channelevents::ChannelEvents; +use netpod::log::*; +use netpod::BinnedRange; +use netpod::DtMs; +use netpod::TsNano; +use query::api4::events::EventsSubQuery; +use std::future::Future; +use std::ops::Range; +use std::pin::Pin; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "BinCachedReader")] +pub enum Error { + TodoImpl, + ChannelSend, + ChannelRecv, + Scylla(String), +} + +#[allow(unused)] +macro_rules! trace_emit { ($($arg:tt)*) => ( if true { trace!($($arg)*); } ) } + +pub fn off_max() -> u64 { + 1000 +} + +pub fn part_len(bin_len: DtMs) -> DtMs { + DtMs::from_ms_u64(bin_len.ms() * off_max()) +} + +pub struct EventsReading { + stream: Pin> + Send>>, +} + +impl EventsReading { + pub fn new(stream: Pin> + Send>>) -> Self { + Self { stream } + } +} + +impl Stream for EventsReading { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + match self.stream.poll_next_unpin(cx) { + Ready(Some(item)) => { + use items_0::streamitem::RangeCompletableItem::*; + use items_0::streamitem::StreamItem::*; + match &item { + Ok(DataItem(Data(cevs))) => match cevs { + ChannelEvents::Events(_) => Ready(Some(item)), + ChannelEvents::Status(_) => Ready(Some(item)), + }, + _ => Ready(Some(item)), + } + } + Ready(None) => Ready(None), + Pending => Pending, + } + } +} + +pub trait EventsReadProvider: Send + Sync { + fn read(&self, evq: EventsSubQuery) -> EventsReading; +} + +pub struct CacheReading { + fut: Pin, streams::timebin::cached::reader::Error>> + Send>>, +} + +impl CacheReading { + pub fn new( + fut: Pin, streams::timebin::cached::reader::Error>> + Send>>, + ) -> Self { + Self { fut } + } +} + +impl Future for CacheReading { + type Output = Result, streams::timebin::cached::reader::Error>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll { + self.fut.poll_unpin(cx) + } +} + +pub struct CacheWriting { + fut: Pin> + Send>>, +} + +impl CacheWriting { + pub fn new(fut: Pin> + Send>>) -> Self { + Self { fut } + } +} + +impl Future for CacheWriting { + type Output = Result<(), streams::timebin::cached::reader::Error>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll { + self.fut.poll_unpin(cx) + } +} + +pub trait CacheReadProvider: Send + Sync { + fn read(&self, series: u64, bin_len: DtMs, msp: u64, offs: Range) -> CacheReading; + fn write(&self, series: u64, bins: BinsBoxed) -> CacheWriting; +} + +pub struct CachedReader { + series: u64, + range: BinnedRange, + ts1next: TsNano, + bin_len: DtMs, + cache_read_provider: Arc, + reading: Option, Error>> + Send>>>, +} + +impl CachedReader { + pub fn new( + series: u64, + range: BinnedRange, + cache_read_provider: Arc, + ) -> Result { + let ret = Self { + series, + ts1next: range.nano_beg(), + bin_len: range.bin_len.to_dt_ms(), + range, + cache_read_provider, + reading: None, + }; + Ok(ret) + } +} + +impl Stream for CachedReader { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break if let Some(fut) = self.reading.as_mut() { + match fut.poll_unpin(cx) { + Ready(x) => { + self.reading = None; + match x { + Ok(Some(bins)) => { + trace_emit!( + "- - - - - - - - - - - - emit cached bins {} bin_len {}", + bins.len(), + self.bin_len + ); + Ready(Some(Ok(bins))) + } + Ok(None) => { + continue; + } + Err(e) => Ready(Some(Err(e))), + } + } + Pending => Pending, + } + } else { + if self.ts1next < self.range.nano_end() { + let div = part_len(self.bin_len).ns(); + let msp = self.ts1next.ns() / div; + let off = (self.ts1next.ns() - div * msp) / self.bin_len.ns(); + let off2 = (self.range.nano_end().ns() - div * msp) / self.bin_len.ns(); + let off2 = off2.min(off_max()); + self.ts1next = TsNano::from_ns(self.bin_len.ns() * off2 + div * msp); + let offs = off as u32..off2 as u32; + let fut = self.cache_read_provider.read(self.series, self.bin_len, msp, offs); + self.reading = Some(Box::pin(fut)); + continue; + } else { + Ready(None) + } + }; + } + } +} diff --git a/src/timebin/fromevents.rs b/src/timebin/fromevents.rs new file mode 100644 index 0000000..3e3cddf --- /dev/null +++ b/src/timebin/fromevents.rs @@ -0,0 +1,80 @@ +use super::cached::reader::EventsReadProvider; +use crate::events::convertforbinning::ConvertForBinning; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::timebin::BinsBoxed; +use items_2::binning::timeweight::timeweight_events_dyn::BinnedEventsTimeweightStream; +use netpod::log::*; +use netpod::BinnedRange; +use netpod::TsNano; +use query::api4::events::EventsSubQuery; +use std::pin::Pin; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; + +macro_rules! trace_emit { ($($arg:tt)*) => ( if true { trace!($($arg)*); } ) } + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "ReadingBinnedFromEvents")] +pub enum Error { + ExpectTimerange, + ExpectTimeweighted, +} + +pub struct BinnedFromEvents { + stream: Pin> + Send>>, +} + +impl BinnedFromEvents { + pub fn new( + range: BinnedRange, + evq: EventsSubQuery, + do_time_weight: bool, + read_provider: Arc, + ) -> Result { + if !evq.range().is_time() { + return Err(Error::ExpectTimerange); + } + let stream = read_provider.read(evq); + let stream = ConvertForBinning::new(Box::pin(stream)); + let stream = if do_time_weight { + let stream = Box::pin(stream); + BinnedEventsTimeweightStream::new(range, stream) + } else { + return Err(Error::ExpectTimeweighted); + }; + let stream = stream.map(|item| match item { + Ok(x) => match x { + StreamItem::DataItem(x) => match x { + RangeCompletableItem::Data(x) => { + trace_emit!("see item {:?}", x); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))) + } + RangeCompletableItem::RangeComplete => { + debug!("BinnedFromEvents sees range final"); + Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)) + } + }, + StreamItem::Log(x) => Ok(StreamItem::Log(x)), + StreamItem::Stats(x) => Ok(StreamItem::Stats(x)), + }, + Err(e) => Err(e), + }); + let ret = Self { + stream: Box::pin(stream), + }; + Ok(ret) + } +} + +impl Stream for BinnedFromEvents { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + self.stream.poll_next_unpin(cx) + } +} diff --git a/src/timebin/fromlayers.rs b/src/timebin/fromlayers.rs new file mode 100644 index 0000000..94af54a --- /dev/null +++ b/src/timebin/fromlayers.rs @@ -0,0 +1,149 @@ +use super::cached::reader::CacheReadProvider; +use super::cached::reader::EventsReadProvider; +use crate::timebin::fromevents::BinnedFromEvents; +use crate::timebin::grid::find_next_finer_bin_len; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::Sitemty; +use items_0::timebin::BinsBoxed; +use items_2::binning::timeweight::timeweight_bins_dyn::BinnedBinsTimeweightStream; +use netpod::log::*; +use netpod::query::CacheUsage; +use netpod::range::evrange::SeriesRange; +use netpod::BinnedRange; +use netpod::ChannelTypeConfigGen; +use netpod::DtMs; +use netpod::ReqCtx; +use netpod::TsNano; +use query::api4::events::EventsSubQuery; +use query::api4::events::EventsSubQuerySelect; +use query::api4::events::EventsSubQuerySettings; +use query::transform::TransformQuery; +use std::pin::Pin; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "TimeBinnedFromLayers")] +pub enum Error { + GapFill(#[from] super::gapfill::Error), + BinnedFromEvents(#[from] super::fromevents::Error), + #[error("FinerGridMismatch({0}, {1})")] + FinerGridMismatch(DtMs, DtMs), +} + +type BoxedInput = Pin> + Send>>; + +pub struct TimeBinnedFromLayers { + inp: BoxedInput, +} + +impl TimeBinnedFromLayers { + pub fn type_name() -> &'static str { + core::any::type_name::() + } + + pub fn new( + ch_conf: ChannelTypeConfigGen, + cache_usage: CacheUsage, + transform_query: TransformQuery, + sub: EventsSubQuerySettings, + log_level: String, + ctx: Arc, + range: BinnedRange, + do_time_weight: bool, + bin_len_layers: Vec, + cache_read_provider: Arc, + events_read_provider: Arc, + ) -> Result { + debug!( + "{}::new {:?} {:?} {:?}", + Self::type_name(), + ch_conf.series(), + range, + bin_len_layers + ); + let bin_len = DtMs::from_ms_u64(range.bin_len.ms()); + if bin_len_layers.contains(&bin_len) { + debug!("{}::new bin_len in layers {:?}", Self::type_name(), range); + let inp = super::gapfill::GapFill::new( + "FromLayers".into(), + ch_conf.clone(), + cache_usage.clone(), + transform_query.clone(), + sub.clone(), + log_level.clone(), + ctx.clone(), + range, + do_time_weight, + bin_len_layers, + cache_read_provider, + events_read_provider.clone(), + )?; + let ret = Self { inp: Box::pin(inp) }; + Ok(ret) + } else { + match find_next_finer_bin_len(bin_len, &bin_len_layers) { + Some(finer) => { + if bin_len.ms() % finer.ms() != 0 { + return Err(Error::FinerGridMismatch(bin_len, finer)); + } + let range_finer = BinnedRange::from_nano_range(range.to_nano_range(), finer); + debug!( + "{}::new next finer from bins {:?} {:?}", + Self::type_name(), + finer, + range_finer + ); + let inp = super::gapfill::GapFill::new( + "FromLayers".into(), + ch_conf.clone(), + cache_usage.clone(), + transform_query.clone(), + sub.clone(), + log_level.clone(), + ctx.clone(), + range_finer.clone(), + do_time_weight, + bin_len_layers, + cache_read_provider, + events_read_provider.clone(), + )?; + let inp = BinnedBinsTimeweightStream::new(range, Box::pin(inp)); + let ret = Self { inp: Box::pin(inp) }; + Ok(ret) + } + None => { + debug!("{}::new next finer from events", Self::type_name()); + let series_range = SeriesRange::TimeRange(range.to_nano_range()); + let one_before_range = true; + let select = EventsSubQuerySelect::new( + ch_conf.clone(), + series_range, + one_before_range, + transform_query.clone(), + ); + let evq = EventsSubQuery::from_parts(select, sub.clone(), ctx.reqid().into(), log_level.clone()); + let inp = BinnedFromEvents::new(range, evq, do_time_weight, events_read_provider)?; + let ret = Self { inp: Box::pin(inp) }; + debug!("{}::new setup from events", Self::type_name()); + Ok(ret) + } + } + } + } +} + +impl Stream for TimeBinnedFromLayers { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + match self.inp.poll_next_unpin(cx) { + Ready(Some(x)) => Ready(Some(x)), + Ready(None) => Ready(None), + Pending => Pending, + } + } +} diff --git a/src/timebin/gapfill.rs b/src/timebin/gapfill.rs new file mode 100644 index 0000000..7e1b4b3 --- /dev/null +++ b/src/timebin/gapfill.rs @@ -0,0 +1,489 @@ +use super::cached::reader::CacheReadProvider; +use super::cached::reader::EventsReadProvider; +use crate::timebin::fromevents::BinnedFromEvents; +use futures_util::FutureExt; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::streamitem::sitem_err_from_string; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::timebin::BinsBoxed; +use items_2::binning::timeweight::timeweight_bins_dyn::BinnedBinsTimeweightStream; +use netpod::log::*; +use netpod::query::CacheUsage; +use netpod::range::evrange::NanoRange; +use netpod::range::evrange::SeriesRange; +use netpod::BinnedRange; +use netpod::ChannelTypeConfigGen; +use netpod::DtMs; +use netpod::ReqCtx; +use netpod::TsNano; +use query::api4::events::EventsSubQuery; +use query::api4::events::EventsSubQuerySelect; +use query::api4::events::EventsSubQuerySettings; +use query::transform::TransformQuery; +use std::pin::Pin; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; + +#[allow(unused)] +macro_rules! debug_init { ($($arg:tt)*) => ( if true { debug!($($arg)*); } ) } + +#[allow(unused)] +macro_rules! debug_setup { ($($arg:tt)*) => ( if true { debug!($($arg)*); } ) } + +#[allow(unused)] +macro_rules! debug_cache { ($($arg:tt)*) => ( if true { debug!($($arg)*); } ) } + +#[allow(unused)] +macro_rules! trace_handle { ($($arg:tt)*) => ( if true { trace!($($arg)*); } ) } + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "BinCachedGapFill")] +pub enum Error { + CacheReader(#[from] super::cached::reader::Error), + #[error("GapFromFiner({0}, {1}, {2})")] + GapFromFiner(TsNano, TsNano, DtMs), + #[error("MissingBegFromFiner({0}, {1}, {2})")] + MissingBegFromFiner(TsNano, TsNano, DtMs), + #[error("InputBeforeRange({0}, {1})")] + InputBeforeRange(NanoRange, BinnedRange), + EventsReader(#[from] super::fromevents::Error), +} + +type Input = Pin> + Send>>; + +// Try to read from cache for the given bin len. +// For gaps in the stream, construct an alternative input from finer bin len with a binner. +pub struct GapFill { + dbgname: String, + ch_conf: ChannelTypeConfigGen, + cache_usage: CacheUsage, + transform_query: TransformQuery, + sub: EventsSubQuerySettings, + log_level: String, + ctx: Arc, + range: BinnedRange, + do_time_weight: bool, + bin_len_layers: Vec, + inp: Option, + inp_range_final: bool, + inp_buf: Option, + inp_finer: Option, + inp_finer_range_final: bool, + inp_finer_range_final_cnt: u32, + inp_finer_range_final_max: u32, + inp_finer_fills_gap: bool, + last_bin_ts2: Option, + exp_finer_range: NanoRange, + cache_read_provider: Arc, + events_read_provider: Arc, + bins_for_cache_write: Option, + done: bool, + cache_writing: Option, +} + +impl GapFill { + // bin_len of the given range must be a cacheable bin_len. + pub fn new( + dbgname_parent: String, + ch_conf: ChannelTypeConfigGen, + cache_usage: CacheUsage, + transform_query: TransformQuery, + sub: EventsSubQuerySettings, + log_level: String, + ctx: Arc, + range: BinnedRange, + do_time_weight: bool, + bin_len_layers: Vec, + cache_read_provider: Arc, + events_read_provider: Arc, + ) -> Result { + let dbgname = format!("{}--[{}]", dbgname_parent, range); + debug_init!("new dbgname {}", dbgname); + let inp = if cache_usage.is_cache_read() { + let series = ch_conf.series().expect("series id for cache read"); + let stream = super::cached::reader::CachedReader::new(series, range.clone(), cache_read_provider.clone())? + .map(|x| match x { + Ok(x) => Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))), + Err(e) => sitem_err_from_string(e), + }); + Box::pin(stream) as Pin> + Send>> + } else { + let stream = futures_util::stream::empty(); + Box::pin(stream) + }; + let ret = Self { + dbgname, + ch_conf, + cache_usage, + transform_query, + sub, + log_level, + ctx, + range, + do_time_weight, + bin_len_layers, + inp: Some(inp), + inp_range_final: false, + inp_buf: None, + inp_finer: None, + inp_finer_range_final: false, + inp_finer_range_final_cnt: 0, + inp_finer_range_final_max: 0, + inp_finer_fills_gap: false, + last_bin_ts2: None, + // TODO just dummy: + exp_finer_range: NanoRange { beg: 0, end: 0 }, + cache_read_provider, + events_read_provider, + bins_for_cache_write: None, + done: false, + cache_writing: None, + }; + Ok(ret) + } + + fn handle_bins_finer(mut self: Pin<&mut Self>, bins: BinsBoxed) -> Result { + trace_handle!("{} handle_bins_finer {}", self.dbgname, bins); + for (&ts1, &ts2) in bins.edges_iter() { + if let Some(last) = self.last_bin_ts2 { + if ts1 != last { + return Err(Error::GapFromFiner(ts1, last, self.range.bin_len_dt_ms())); + } + } else if ts1 != self.range.nano_beg() { + return Err(Error::MissingBegFromFiner( + ts1, + self.range.nano_beg(), + self.range.bin_len_dt_ms(), + )); + } + self.last_bin_ts2 = Some(ts2); + } + if bins.len() != 0 { + let mut bins2 = bins.clone(); + let dst = self.bins_for_cache_write.get_or_insert_with(|| bins.empty()); + bins2.drain_into(dst.as_mut(), 0..bins2.len()); + } + if self.cache_usage.is_cache_write() { + self.cache_write_intermediate()?; + } // TODO make sure that input does not send "made-up" empty future bins. + // On the other hand, if the request is over past range, but the channel was silent ever since? + // Then we should in principle know that from is-alive status checking. + // So, until then, allow made-up bins? + // Maybe, for now, only write those bins before some last non-zero-count bin. The only safe way. + Ok(bins) + } + + fn setup_sub(self: Pin<&mut Self>, range: NanoRange) -> Result<(), Error> { + trace_handle!("{} SETUP SUB STREAM {}", self.dbgname, range); + self.setup_inp_finer(range, true)?; + Ok(()) + } + + fn handle_bins(mut self: Pin<&mut Self>, bins: BinsBoxed) -> Result { + trace_handle!("{} handle_bins {}", self.dbgname, bins); + // TODO could use an interface to iterate over opaque bin items that only expose + // edge and count information with all remaining values opaque. + for (i, (&ts1, &ts2)) in bins.edges_iter().enumerate() { + if ts1 < self.range.nano_beg() { + return Err(Error::InputBeforeRange( + NanoRange::from_ns_u64(ts1.ns(), ts2.ns()), + self.range.clone(), + )); + } + if let Some(last) = self.last_bin_ts2 { + if ts1 != last { + trace_handle!("{} detect a gap BETWEEN last {} ts1 {}", self.dbgname, last, ts1); + let mut ret = bins.empty(); + let mut bins = bins; + bins.drain_into(ret.as_mut(), 0..i); + self.inp_buf = Some(bins); + let range = NanoRange { + beg: last.ns(), + end: ts1.ns(), + }; + self.setup_sub(range)?; + return Ok(ret); + } else { + // nothing to do + } + } else if ts1 != self.range.nano_beg() { + trace_handle!( + "{} detect a gap BEGIN beg {} ts1 {}", + self.dbgname, + self.range.nano_beg(), + ts1 + ); + let range = NanoRange { + beg: self.range.nano_beg().ns(), + end: ts1.ns(), + }; + self.setup_sub(range)?; + return Ok(bins.empty()); + } + self.last_bin_ts2 = Some(ts2); + } + Ok(bins) + } + + fn setup_inp_finer(mut self: Pin<&mut Self>, range: NanoRange, inp_finer_fills_gap: bool) -> Result<(), Error> { + self.inp_finer_range_final = false; + self.inp_finer_range_final_max += 1; + self.inp_finer_fills_gap = inp_finer_fills_gap; + self.exp_finer_range = range.clone(); + if let Some(bin_len_finer) = + super::grid::find_next_finer_bin_len(self.range.bin_len.to_dt_ms(), &self.bin_len_layers) + { + debug_setup!( + "{} setup_inp_finer next finer from bins {} {} from {}", + self.dbgname, + range, + bin_len_finer, + self.range.bin_len.to_dt_ms() + ); + let range_finer = BinnedRange::from_nano_range(range, bin_len_finer); + let range_finer_one_before_bin = range_finer.one_before_bin(); + let inp_finer = GapFill::new( + self.dbgname.clone(), + self.ch_conf.clone(), + self.cache_usage.clone(), + self.transform_query.clone(), + self.sub.clone(), + self.log_level.clone(), + self.ctx.clone(), + range_finer_one_before_bin, + self.do_time_weight, + self.bin_len_layers.clone(), + self.cache_read_provider.clone(), + self.events_read_provider.clone(), + )?; + let stream = Box::pin(inp_finer); + let range = BinnedRange::from_nano_range(range_finer.full_range(), self.range.bin_len.to_dt_ms()); + let stream = if self.do_time_weight { + BinnedBinsTimeweightStream::new(range, stream) + } else { + panic!("TODO unweighted") + }; + self.inp_finer = Some(Box::pin(stream)); + } else { + debug_setup!("{} setup_inp_finer next finer from events {}", self.dbgname, range); + let series_range = SeriesRange::TimeRange(range.clone()); + let one_before_range = true; + let select = EventsSubQuerySelect::new( + self.ch_conf.clone(), + series_range, + one_before_range, + self.transform_query.clone(), + ); + let evq = EventsSubQuery::from_parts( + select, + self.sub.clone(), + self.ctx.reqid().into(), + self.log_level.clone(), + ); + let range = BinnedRange::from_nano_range(range.clone(), self.range.bin_len.to_dt_ms()); + let inp = BinnedFromEvents::new(range, evq, self.do_time_weight, self.events_read_provider.clone())?; + self.inp_finer = Some(Box::pin(inp)); + } + Ok(()) + } + + fn cache_write(mut self: Pin<&mut Self>, bins: BinsBoxed) -> Result<(), Error> { + // TODO emit bins that are ready for cache write into some separate channel + let series = todo!(); + self.cache_writing = Some(self.cache_read_provider.write(series, bins)); + Ok(()) + } + + fn cache_write_on_end(mut self: Pin<&mut Self>) -> Result<(), Error> { + if self.inp_finer_fills_gap { + // TODO can consider all incoming bins as final by assumption. + } + if let Some(bins) = &self.bins_for_cache_write { + if bins.len() >= 2 { + // TODO guard behind flag. + // TODO emit to a async user-given channel, if given. + // Therefore, move to poll loop. + // Should only write to cache with non-zero count, therefore, not even emit others? + // TODO afterwards set to None. + self.bins_for_cache_write = None; + } + } + Ok(()) + } + + fn cache_write_intermediate(self: Pin<&mut Self>) -> Result<(), Error> { + // TODO See cache_write_on_end + Ok(()) + } +} + +impl Stream for GapFill { + type Item = Sitemty; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + use Poll::*; + loop { + break if self.done { + Ready(None) + } else if let Some(fut) = self.cache_writing.as_mut() { + match fut.poll_unpin(cx) { + Ready(Ok(())) => { + self.cache_writing = None; + continue; + } + Ready(Err(e)) => { + self.cache_writing = None; + Ready(Some(sitem_err_from_string(e))) + } + Pending => Pending, + } + } else if let Some(inp_finer) = self.inp_finer.as_mut() { + match inp_finer.poll_next_unpin(cx) { + Ready(Some(Ok(x))) => match x { + StreamItem::DataItem(RangeCompletableItem::Data(x)) => { + match self.as_mut().handle_bins_finer(x) { + Ok(x) => Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))))), + Err(e) => Ready(Some(sitem_err_from_string(e))), + } + } + StreamItem::DataItem(RangeCompletableItem::RangeComplete) => { + trace_handle!("{} RECV RANGE FINAL", self.dbgname); + self.inp_finer_range_final = true; + self.inp_finer_range_final_cnt += 1; + if self.cache_usage.is_cache_write() { + match self.as_mut().cache_write_on_end() { + Ok(()) => continue, + Err(e) => Ready(Some(sitem_err_from_string(e))), + } + } else { + continue; + } + } + StreamItem::Log(x) => Ready(Some(Ok(StreamItem::Log(x)))), + StreamItem::Stats(x) => Ready(Some(Ok(StreamItem::Stats(x)))), + }, + Ready(Some(Err(e))) => Ready(Some(sitem_err_from_string(e))), + Ready(None) => { + trace_handle!( + "{} inp_finer Ready(None) last_bin_ts2 {:?}", + self.dbgname, + self.last_bin_ts2 + ); + let exp_finer_range = + ::core::mem::replace(&mut self.exp_finer_range, NanoRange { beg: 0, end: 0 }); + self.inp_finer = None; + if let Some(j) = self.last_bin_ts2 { + if j.ns() != exp_finer_range.end() { + trace_handle!( + "{} inp_finer Ready(None) last_bin_ts2 {:?} exp_finer_range {:?}", + self.dbgname, + self.last_bin_ts2, + exp_finer_range + ); + if self.inp_finer_fills_gap { + Ready(Some(sitem_err_from_string("finer input didn't deliver to the end"))) + } else { + warn!( + "{} inp_finer Ready(None) last_bin_ts2 {:?} not delivered to the end, but maybe in the future", + self.dbgname, self.last_bin_ts2 + ); + continue; + } + } else { + continue; + } + } else if self.inp_finer_fills_gap { + error!( + "{} inp_finer Ready(None) last_bin_ts2 {:?}", + self.dbgname, self.last_bin_ts2 + ); + Ready(Some(sitem_err_from_string( + "finer input delivered nothing, received nothing at all so far", + ))) + } else { + warn!( + "{} inp_finer Ready(None) last_bin_ts2 {:?}", + self.dbgname, self.last_bin_ts2 + ); + continue; + } + } + Pending => Pending, + } + } else if let Some(x) = self.inp_buf.take() { + match self.as_mut().handle_bins_finer(x) { + Ok(x) => Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))))), + Err(e) => Ready(Some(sitem_err_from_string(e))), + } + } else if let Some(inp) = self.inp.as_mut() { + match inp.poll_next_unpin(cx) { + Ready(Some(Ok(x))) => match x { + StreamItem::DataItem(RangeCompletableItem::Data(x)) => match self.as_mut().handle_bins(x) { + Ok(x) => Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))))), + Err(e) => Ready(Some(sitem_err_from_string(e))), + }, + StreamItem::DataItem(RangeCompletableItem::RangeComplete) => { + self.inp_range_final = true; + continue; + } + StreamItem::Log(x) => Ready(Some(Ok(StreamItem::Log(x)))), + StreamItem::Stats(x) => Ready(Some(Ok(StreamItem::Stats(x)))), + }, + Ready(Some(Err(e))) => Ready(Some(sitem_err_from_string(e))), + Ready(None) => { + self.inp = None; + // TODO assert that we have emitted up to the requested range. + // If not, request the remaining range from "finer" input. + if let Some(j) = self.last_bin_ts2 { + if j != self.range.nano_end() { + let range = NanoRange { + beg: j.ns(), + end: self.range.full_range().end(), + }; + debug!( + "{} received something but not all, setup rest from finer {} {} {}", + self.dbgname, self.range, j, range + ); + match self.as_mut().setup_inp_finer(range, false) { + Ok(()) => { + continue; + } + Err(e) => Ready(Some(sitem_err_from_string(e))), + } + } else { + debug!("{} received everything", self.dbgname); + Ready(None) + } + } else { + let range = self.range.to_nano_range(); + debug!( + "{} received nothing at all, setup full range from finer {} {}", + self.dbgname, self.range, range + ); + match self.as_mut().setup_inp_finer(range, false) { + Ok(()) => { + continue; + } + Err(e) => Ready(Some(sitem_err_from_string(e))), + } + } + } + Pending => Pending, + } + } else { + self.done = true; + if self.inp_finer_range_final_cnt == self.inp_finer_range_final_max { + trace_handle!("{} range finale all", self.dbgname); + Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))) + } else { + trace_handle!("{} substreams not final", self.dbgname); + continue; + } + }; + } + } +} diff --git a/src/timebin/grid.rs b/src/timebin/grid.rs new file mode 100644 index 0000000..bd6e333 --- /dev/null +++ b/src/timebin/grid.rs @@ -0,0 +1,12 @@ +use netpod::DtMs; + +// Find the next finer bin len from the passed list. +// The list is assumed to be sorted ascending, meaning finer bin len first. +pub fn find_next_finer_bin_len(bin_len: DtMs, layers: &[DtMs]) -> Option { + for l in layers.iter().rev() { + if *l < bin_len { + return Some(*l); + } + } + None +} diff --git a/src/timebin/timebin.rs b/src/timebin/timebin.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/timebinnedjson.rs b/src/timebinnedjson.rs new file mode 100644 index 0000000..764f8f4 --- /dev/null +++ b/src/timebinnedjson.rs @@ -0,0 +1,472 @@ +use crate::collect::Collect; +use crate::collect::CollectResult; +use crate::json_stream::JsonBytes; +use crate::json_stream::JsonStream; +use crate::rangefilter2::RangeFilter2; +use crate::streamtimeout::StreamTimeout2; +use crate::streamtimeout::TimeoutableStream; +use crate::tcprawclient::container_stream_from_bytes_stream; +use crate::tcprawclient::make_sub_query; +use crate::tcprawclient::OpenBoxedBytesStreamsBox; +use crate::timebin::cached::reader::EventsReadProvider; +use crate::timebin::CacheReadProvider; +use crate::transform::build_merged_event_transform; +use futures_util::future::BoxFuture; +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::collect_s::CollectableDyn; +use items_0::on_sitemty_data; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::Events; +use items_2::channelevents::ChannelEvents; +use items_2::merger::Merger; +use netpod::log::*; +use netpod::range::evrange::NanoRange; +use netpod::BinnedRangeEnum; +use netpod::ChannelTypeConfigGen; +use netpod::DtMs; +use netpod::ReqCtx; +use query::api4::binned::BinnedQuery; +use query::api4::events::EventsSubQuerySettings; +use query::transform::TransformQuery; +use serde_json::Value as JsonValue; +use std::pin::Pin; +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "TimebinnedJson")] +pub enum Error { + Query(#[from] query::api4::binned::Error), + FromLayers(#[from] super::timebin::fromlayers::Error), + Transform(#[from] super::transform::Error), + TcpRawClient(#[from] crate::tcprawclient::Error), + Collect(#[from] crate::collect::Error), + Json(#[from] serde_json::Error), + Msg(String), +} + +struct ErrMsg(E) +where + E: ToString; + +impl From> for Error +where + E: ToString, +{ + fn from(value: ErrMsg) -> Self { + Self::Msg(value.0.to_string()) + } +} + +#[allow(unused)] +fn assert_stream_send<'u, R>(stream: impl 'u + Send + Stream) -> impl 'u + Send + Stream { + stream +} + +pub async fn timebinnable_stream_sf_databuffer_channelevents( + range: NanoRange, + one_before_range: bool, + ch_conf: ChannelTypeConfigGen, + transform_query: TransformQuery, + sub: EventsSubQuerySettings, + log_level: String, + ctx: Arc, + open_bytes: OpenBoxedBytesStreamsBox, +) -> Result>, Error> { + let subq = make_sub_query( + ch_conf, + range.clone().into(), + one_before_range, + transform_query, + sub.clone(), + log_level.clone(), + &ctx, + ); + let inmem_bufcap = subq.inmem_bufcap(); + let _wasm1 = subq.wasm1().map(ToString::to_string); + let mut tr = build_merged_event_transform(subq.transform())?; + let bytes_streams = open_bytes.open(subq, ctx.as_ref().clone()).await?; + let mut inps = Vec::new(); + for s in bytes_streams { + let s = container_stream_from_bytes_stream::(s, inmem_bufcap.clone(), "TODOdbgdesc".into())?; + let s = Box::pin(s) as Pin> + Send>>; + inps.push(s); + } + // TODO propagate also the max-buf-len for the first stage event reader. + // TODO use a mixture of count and byte-size as threshold. + let stream = Merger::new(inps, sub.merger_out_len_max()); + let stream = RangeFilter2::new(stream, range, one_before_range); + let stream = stream.map(move |k: Sitemty| { + use ChannelEvents; + use RangeCompletableItem::*; + use StreamItem::*; + match k { + Ok(DataItem(Data(ChannelEvents::Events(k)))) => { + // let k = k; + // let k: Box = Box::new(k); + let k = k.to_dim0_f32_for_binning(); + let k = tr.0.transform(k); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(ChannelEvents::Events( + k, + )))) + } + _ => k, + } + }); + + #[cfg(feature = "wasm_transform")] + let stream = if let Some(wasmname) = wasm1 { + debug!("make wasm transform"); + use httpclient::url::Url; + use wasmer::Value; + use wasmer::WasmSlice; + let t = httpclient::http_get( + Url::parse(&format!("http://data-api.psi.ch/distri/{}", wasmname)).unwrap(), + "*/*", + ctx, + ) + .await + .unwrap(); + let wasm = t.body; + // let wasm = include_bytes!("dummy.wasm"); + let mut store = wasmer::Store::default(); + let module = wasmer::Module::new(&store, wasm).unwrap(); + // TODO assert that memory is large enough + let memory = wasmer::Memory::new(&mut store, wasmer::MemoryType::new(10, Some(30), false)).unwrap(); + let import_object = wasmer::imports! { + "env" => { + "memory" => memory.clone(), + } + }; + let instance = wasmer::Instance::new(&mut store, &module, &import_object).unwrap(); + let get_buffer_ptr = instance.exports.get_function("get_buffer_ptr").unwrap(); + let buffer_ptr = get_buffer_ptr.call(&mut store, &[]).unwrap(); + let buffer_ptr = buffer_ptr[0].i32().unwrap(); + let stream = stream.map(move |x| { + let memory = memory.clone(); + let item = on_sitemty_data!(x, |mut evs: Box| { + let x = { + use items_0::AsAnyMut; + if true { + let r1 = evs + .as_any_mut() + .downcast_mut::>() + .is_some(); + let r2 = evs + .as_mut() + .as_any_mut() + .downcast_mut::>() + .is_some(); + let r3 = evs + .as_any_mut() + .downcast_mut::>>() + .is_some(); + let r4 = evs + .as_mut() + .as_any_mut() + .downcast_mut::>>() + .is_some(); + let r5 = evs.as_mut().as_any_mut().downcast_mut::().is_some(); + let r6 = evs.as_mut().as_any_mut().downcast_mut::>().is_some(); + debug!("wasm castings: {r1} {r2} {r3} {r4} {r5} {r6}"); + } + if let Some(evs) = evs.as_any_mut().downcast_mut::() { + match evs { + ChannelEvents::Events(evs) => { + if let Some(evs) = + evs.as_any_mut().downcast_mut::>() + { + use items_0::WithLen; + if evs.len() == 0 { + debug!("wasm empty EventsDim0"); + } else { + debug!("wasm see EventsDim0 len {}", evs.len()); + let max_len_needed = 16000; + let dummy1 = instance.exports.get_function("dummy1").unwrap(); + let s = evs.values.as_mut_slices(); + for sl in [s.0, s.1] { + if sl.len() > max_len_needed as _ { + // TODO cause error + panic!(); + } + let wmemoff = buffer_ptr as u64; + let view = memory.view(&store); + // TODO is the offset bytes or elements? + let wsl = WasmSlice::::new(&view, wmemoff, sl.len() as _).unwrap(); + // debug!("wasm pages {:?} data size {:?}", view.size(), view.data_size()); + wsl.write_slice(&sl).unwrap(); + let ptr = wsl.as_ptr32(); + debug!("ptr {:?} offset {}", ptr, ptr.offset()); + let params = [Value::I32(ptr.offset() as _), Value::I32(sl.len() as _)]; + let res = dummy1.call(&mut store, ¶ms).unwrap(); + match res[0] { + Value::I32(x) => { + debug!("wasm dummy1 returned: {x:?}"); + if x != 1 { + error!("unexpected return value {res:?}"); + } + } + _ => { + error!("unexpected return type {res:?}"); + } + } + // Init the slice again because we need to drop ownership for the function call. + let view = memory.view(&store); + let wsl = WasmSlice::::new(&view, wmemoff, sl.len() as _).unwrap(); + wsl.read_slice(sl).unwrap(); + } + } + } else { + debug!("wasm not EventsDim0"); + } + } + ChannelEvents::Status(_) => {} + } + } else { + debug!("wasm not ChannelEvents"); + } + evs + }; + Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))) + }); + // Box::new(item) as Box + item + }); + Box::pin(stream) as Pin>> + Send>> + } else { + let stream = stream.map(|x| x); + Box::pin(stream) + }; + Ok(stream) +} + +async fn timebinned_stream( + query: BinnedQuery, + binned_range: BinnedRangeEnum, + ch_conf: ChannelTypeConfigGen, + ctx: &ReqCtx, + cache_read_provider: Arc, + events_read_provider: Arc, +) -> Result>> + Send>>, Error> { + use netpod::query::CacheUsage; + let cache_usage = query.cache_usage().unwrap_or(CacheUsage::Ignore); + let do_time_weight = true; + let bin_len_layers = if let Some(subgrids) = query.subgrids() { + subgrids + .iter() + .map(|&x| DtMs::from_ms_u64(1000 * x.as_secs())) + .collect() + } else { + netpod::time_bin_len_cache_opts().to_vec() + }; + let stream = crate::timebin::TimeBinnedFromLayers::new( + ch_conf, + cache_usage, + query.transform().clone(), + EventsSubQuerySettings::from(&query), + query.log_level().into(), + Arc::new(ctx.clone()), + binned_range.binned_range_time(), + do_time_weight, + bin_len_layers, + cache_read_provider, + events_read_provider, + )?; + let stream = stream.map(|item| { + use items_0::timebin::BinningggContainerBinsDyn; + on_sitemty_data!(item, |mut x: Box| { + x.fix_numerics(); + let ret = Box::new(x) as Box; + Ok(StreamItem::DataItem(RangeCompletableItem::Data(ret))) + }) + }); + let stream = Box::pin(stream); + Ok(stream) +} + +pub async fn timebinned_json( + query: BinnedQuery, + ch_conf: ChannelTypeConfigGen, + ctx: &ReqCtx, + cache_read_provider: Arc, + events_read_provider: Arc, + timeout_provider: Box, +) -> Result, Error> { + let deadline = Instant::now() + + query + .timeout_content() + .unwrap_or(Duration::from_millis(3000)) + .min(Duration::from_millis(5000)) + .max(Duration::from_millis(200)); + let binned_range = query.covering_range()?; + // TODO derive better values, from query + let collect_max = 10000; + let bytes_max = 100 * collect_max; + let stream = timebinned_stream( + query.clone(), + binned_range.clone(), + ch_conf, + ctx, + cache_read_provider, + events_read_provider, + ) + .await?; + let collected = Collect::new( + stream, + deadline, + collect_max, + bytes_max, + None, + Some(binned_range), + timeout_provider, + ); + let collected: BoxFuture<_> = Box::pin(collected); + let collres = collected.await?; + match collres { + CollectResult::Some(collres) => { + let collres = if let Some(_bins) = collres + .as_any_ref() + .downcast_ref::>() + { + debug!("unexpected binned enum"); + // bins.boxed_collected_with_enum_fix() + collres + } else { + debug!("timebinned_json collected type_name {:?}", collres.type_name()); + collres + }; + let jsval = collres.to_json_value()?; + Ok(CollectResult::Some(jsval)) + } + CollectResult::Timeout => Ok(CollectResult::Timeout), + } +} + +fn take_collector_result(coll: &mut Box) -> Option { + match coll.result(None, None) { + Ok(collres) => { + let collres = if let Some(_bins) = collres + .as_any_ref() + .downcast_ref::>() + { + warn!("unexpected binned enum"); + // bins.boxed_collected_with_enum_fix() + collres + } else { + collres + }; + match collres.to_json_value() { + Ok(val) => Some(val), + Err(e) => Some(serde_json::Value::String(format!("{e}"))), + } + } + Err(e) => Some(serde_json::Value::String(format!("{e}"))), + } +} + +pub async fn timebinned_json_framed( + query: BinnedQuery, + ch_conf: ChannelTypeConfigGen, + ctx: &ReqCtx, + cache_read_provider: Arc, + events_read_provider: Arc, + timeout_provider: Box, +) -> Result { + trace!("timebinned_json_framed"); + let binned_range = query.covering_range()?; + // TODO derive better values, from query + let stream = timebinned_stream( + query.clone(), + binned_range.clone(), + ch_conf, + ctx, + cache_read_provider, + events_read_provider, + ) + .await?; + // let stream = timebinned_to_collectable(stream); + // TODO create a custom Stream adapter. + // Want to timeout only on data items: the user wants to wait for bins only a maximum time. + // But also, I want to coalesce. + let timeout_content_base = query + .timeout_content() + .unwrap_or(Duration::from_millis(1000)) + .min(Duration::from_millis(5000)) + .max(Duration::from_millis(100)); + let timeout_content_2 = timeout_content_base * 2 / 3; + let mut coll = None; + let mut last_emit = Instant::now(); + let stream = stream.map(|x| Some(x)).chain(futures_util::stream::iter([None])); + let stream = TimeoutableStream::new(timeout_content_base, timeout_provider, stream); + let stream = stream.map(move |x| { + match x { + Some(x) => match x { + Some(x) => match x { + Ok(x) => match x { + StreamItem::DataItem(x) => match x { + RangeCompletableItem::Data(mut item) => { + let coll = coll.get_or_insert_with(|| item.new_collector()); + coll.ingest(&mut item); + if coll.len() >= 128 || last_emit.elapsed() >= timeout_content_2 { + last_emit = Instant::now(); + take_collector_result(coll).map(|x| Ok(x)) + } else { + // Some(serde_json::Value::String(format!("coll len {}", coll.len()))) + None + } + } + RangeCompletableItem::RangeComplete => None, + }, + StreamItem::Log(x) => { + debug!("{x:?}"); + // Some(serde_json::Value::String(format!("{x:?}"))) + None + } + StreamItem::Stats(x) => { + debug!("{x:?}"); + // Some(serde_json::Value::String(format!("{x:?}"))) + None + } + }, + Err(e) => Some(Err(e)), + }, + None => { + if let Some(coll) = coll.as_mut() { + last_emit = Instant::now(); + take_collector_result(coll).map(|x| Ok(x)) + } else { + // Some(serde_json::Value::String(format!( + // "end of input but no collector to take something from" + // ))) + None + } + } + }, + None => { + if let Some(coll) = coll.as_mut() { + if coll.len() != 0 { + last_emit = Instant::now(); + take_collector_result(coll).map(|x| Ok(x)) + } else { + // Some(serde_json::Value::String(format!("timeout but nothing to do"))) + None + } + } else { + // Some(serde_json::Value::String(format!("timeout but no collector"))) + None + } + } + } + }); + let stream = stream.filter_map(|x| futures_util::future::ready(x)); + // TODO skip the intermediate conversion to js value, go directly to string data + let stream = stream.map(|x| match x { + Ok(x) => Ok(JsonBytes::new(serde_json::to_string(&x).unwrap())), + Err(e) => Err(crate::json_stream::Error::from(crate::json_stream::ErrMsg(e))), + }); + Ok(Box::pin(stream)) +} diff --git a/src/transform.rs b/src/transform.rs new file mode 100644 index 0000000..0d5bf3d --- /dev/null +++ b/src/transform.rs @@ -0,0 +1,100 @@ +use futures_util::Stream; +use futures_util::StreamExt; +use items_0::collect_s::CollectableDyn; +use items_0::streamitem::RangeCompletableItem; +use items_0::streamitem::Sitemty; +use items_0::streamitem::StreamItem; +use items_0::transform::CollectableStreamBox; +use items_0::transform::EventStreamBox; +use items_0::transform::EventStreamTrait; +use items_0::transform::TransformEvent; +use items_0::transform::TransformProperties; +use items_0::transform::WithTransformProperties; +use items_2::transform::make_transform_identity; +use items_2::transform::make_transform_min_max_avg; +use items_2::transform::make_transform_pulse_id_diff; +use query::transform::EventTransformQuery; +use query::transform::TimeBinningTransformQuery; +use query::transform::TransformQuery; +use std::pin::Pin; + +#[derive(Debug, thiserror::Error)] +#[cstm(name = "Transform")] +pub enum Error { + #[error("UnhandledQuery({0:?})")] + UnhandledQuery(EventTransformQuery), +} + +pub fn build_event_transform(tr: &TransformQuery) -> Result { + let trev = tr.get_tr_event(); + match trev { + EventTransformQuery::ValueFull => Ok(make_transform_identity()), + EventTransformQuery::MinMaxAvgDev => Ok(make_transform_min_max_avg()), + EventTransformQuery::ArrayPick(..) => Err(Error::UnhandledQuery(trev.clone())), + EventTransformQuery::PulseIdDiff => Ok(make_transform_pulse_id_diff()), + EventTransformQuery::EventBlobsVerbatim => Err(Error::UnhandledQuery(trev.clone())), + EventTransformQuery::EventBlobsUncompressed => Err(Error::UnhandledQuery(trev.clone())), + } +} + +pub fn build_merged_event_transform(tr: &TransformQuery) -> Result { + let trev = tr.get_tr_event(); + match trev { + EventTransformQuery::PulseIdDiff => Ok(make_transform_pulse_id_diff()), + _ => Ok(make_transform_identity()), + } +} + +// TODO remove, in its current usage it reboxes +pub struct EventsToTimeBinnable { + inp: Pin>, +} + +impl EventsToTimeBinnable { + pub fn new(inp: INP) -> Self + where + INP: EventStreamTrait + 'static, + { + Self { inp: Box::pin(inp) } + } +} + +impl WithTransformProperties for EventsToTimeBinnable { + fn query_transform_properties(&self) -> TransformProperties { + self.inp.query_transform_properties() + } +} + +pub fn build_full_transform_collectable( + tr: &TransformQuery, + inp: EventStreamBox, +) -> Result { + // TODO this must return a Stream! + //let evs = build_event_transform(tr, inp)?; + let trtb = tr.get_tr_time_binning(); + let a: Pin>> + Send>> = + Box::pin(inp.0.map(|item| match item { + Ok(item) => match item { + StreamItem::DataItem(item) => match item { + RangeCompletableItem::Data(item) => { + let item: Box = Box::new(item); + Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))) + } + RangeCompletableItem::RangeComplete => { + Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)) + } + }, + StreamItem::Log(item) => Ok(StreamItem::Log(item)), + StreamItem::Stats(item) => Ok(StreamItem::Stats(item)), + }, + Err(e) => Err(e), + })); + let stream: Pin>> + Send>> = + Box::pin(futures_util::stream::empty()); + let stream = Box::pin(futures_util::stream::empty()) as _; + match trtb { + TimeBinningTransformQuery::None => Ok(CollectableStreamBox(stream)), + TimeBinningTransformQuery::TimeWeighted => todo!(), + TimeBinningTransformQuery::Unweighted => todo!(), + } +}