This commit is contained in:
Dominik Werder
2024-09-12 17:13:50 +02:00
parent f550d37602
commit 5ee1779fee
13 changed files with 502 additions and 400 deletions

View File

@@ -104,7 +104,7 @@ pub async fn plain_events_json_stream(
ctx: &ReqCtx,
open_bytes: OpenBoxedBytesStreamsBox,
) -> Result<JsonStream, Error> {
trace!("build stream");
trace!("plain_events_json_stream");
let stream = dyn_events_stream(evq, ch_conf, ctx, open_bytes).await?;
let stream = events_stream_to_json_stream(stream);
let stream = non_empty(stream);

View File

@@ -15,11 +15,23 @@ use netpod::DtMs;
use netpod::TsNano;
use query::api4::events::EventsSubQuery;
use std::future::Future;
use std::ops::Range;
use std::pin::Pin;
use std::sync::Arc;
use std::task::Context;
use std::task::Poll;
#[allow(unused)]
macro_rules! trace_emit { ($($arg:tt)*) => ( if true { trace!($($arg)*); } ) }
pub fn off_max() -> u64 {
1000
}
pub fn part_len(bin_len: DtMs) -> DtMs {
DtMs::from_ms_u64(bin_len.ms() * off_max())
}
pub struct EventsReading {
stream: Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>,
}
@@ -43,11 +55,19 @@ pub trait EventsReadProvider: Send + Sync {
}
pub struct CacheReading {
fut: Pin<Box<dyn Future<Output = Result<BinsDim0<f32>, Box<dyn std::error::Error + Send>>> + Send>>,
fut: Pin<Box<dyn Future<Output = Result<BinsDim0<f32>, streams::timebin::cached::reader::Error>> + Send>>,
}
impl CacheReading {
pub fn new(
fut: Pin<Box<dyn Future<Output = Result<BinsDim0<f32>, streams::timebin::cached::reader::Error>> + Send>>,
) -> Self {
Self { fut }
}
}
impl Future for CacheReading {
type Output = Result<BinsDim0<f32>, Box<dyn std::error::Error + Send>>;
type Output = Result<BinsDim0<f32>, streams::timebin::cached::reader::Error>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
self.fut.poll_unpin(cx)
@@ -73,7 +93,7 @@ impl Future for CacheWriting {
}
pub trait CacheReadProvider: Send + Sync {
fn read(&self, series: u64, range: BinnedRange<TsNano>) -> CacheReading;
fn read(&self, series: u64, bin_len: DtMs, msp: u64, offs: Range<u32>) -> CacheReading;
fn write(&self, series: u64, bins: BinsDim0<f32>) -> CacheWriting;
}
@@ -87,17 +107,28 @@ pub enum Error {
}
pub struct CachedReader {
series: u64,
range: BinnedRange<TsNano>,
ts1next: TsNano,
bin_len: DtMs,
cache_read_provider: Arc<dyn CacheReadProvider>,
reading: Option<Pin<Box<dyn Future<Output = Result<BinsDim0<f32>, Error>> + Send>>>,
}
impl CachedReader {
pub fn new(
series: u64,
bin_len: DtMs,
range: BinnedRange<TsNano>,
cache_read_provider: Arc<dyn CacheReadProvider>,
) -> Result<Self, Error> {
let ret = Self { cache_read_provider };
let ret = Self {
series,
ts1next: range.nano_beg(),
bin_len: range.bin_len.to_dt_ms(),
range,
cache_read_provider,
reading: None,
};
Ok(ret)
}
}
@@ -113,8 +144,42 @@ impl Stream for CachedReader {
// Change the worker interface:
// We should already compute here the msp and off because we must here implement the loop logic.
// Therefore worker interface should not accept BinnedRange, but msp and off range.
error!("TODO CachedReader impl split reads over known ranges");
// Ready(Some(Err(Error::TodoImpl)))
Ready(None)
loop {
break if let Some(fut) = self.reading.as_mut() {
match fut.poll_unpin(cx) {
Ready(x) => {
self.reading = None;
match x {
Ok(bins) => {
use items_0::WithLen;
trace_emit!(
"- - - - - - - - - - - - emit cached bins {} bin_len {}",
bins.len(),
self.bin_len
);
Ready(Some(Ok(bins)))
}
Err(e) => Ready(Some(Err(e))),
}
}
Pending => Pending,
}
} else {
if self.ts1next < self.range.nano_end() {
let div = part_len(self.bin_len).ns();
let msp = self.ts1next.ns() / div;
let off = (self.ts1next.ns() - div * msp) / self.bin_len.ns();
let off2 = (self.range.nano_end().ns() - div * msp) / self.bin_len.ns();
let off2 = off2.min(off_max());
self.ts1next = TsNano::from_ns(self.bin_len.ns() * off2 + div * msp);
let offs = off as u32..off2 as u32;
let fut = self.cache_read_provider.read(self.series, self.bin_len, msp, offs);
self.reading = Some(Box::pin(fut));
continue;
} else {
Ready(None)
}
};
}
}
}

View File

@@ -1,5 +1,4 @@
use super::cached::reader::EventsReadProvider;
use super::cached::reader::EventsReading;
use err::thiserror;
use err::ThisError;
use futures_util::Stream;
@@ -18,6 +17,9 @@ use std::sync::Arc;
use std::task::Context;
use std::task::Poll;
#[allow(unused)]
macro_rules! trace_emit { ($($arg:tt)*) => ( if true { trace!($($arg)*); } ) }
#[derive(Debug, ThisError)]
#[cstm(name = "ReadingBinnedFromEvents")]
pub enum Error {}
@@ -47,6 +49,8 @@ impl BinnedFromEvents {
// TODO need a typed time binner
if let Some(x) = x.as_any_mut().downcast_mut::<BinsDim0<f32>>() {
let y = x.clone();
use items_0::WithLen;
trace_emit!("=========== ========= emit from events {}", y.len());
Ok(StreamItem::DataItem(RangeCompletableItem::Data(y)))
} else {
Err(::err::Error::with_msg_no_trace(

View File

@@ -15,6 +15,7 @@ use items_0::streamitem::StreamItem;
use items_0::timebin::TimeBinnableTy;
use items_2::binsdim0::BinsDim0;
use netpod::log::*;
use netpod::query::CacheUsage;
use netpod::range::evrange::SeriesRange;
use netpod::BinnedRange;
use netpod::BinnedRangeEnum;
@@ -45,6 +46,7 @@ type BoxedInput = Pin<Box<dyn Stream<Item = Sitemty<BinsDim0<f32>>> + Send>>;
pub struct TimeBinnedFromLayers {
ch_conf: ChannelTypeConfigGen,
cache_usage: CacheUsage,
transform_query: TransformQuery,
sub: EventsSubQuerySettings,
log_level: String,
@@ -60,6 +62,7 @@ impl TimeBinnedFromLayers {
pub fn new(
ch_conf: ChannelTypeConfigGen,
cache_usage: CacheUsage,
transform_query: TransformQuery,
sub: EventsSubQuerySettings,
log_level: String,
@@ -85,6 +88,7 @@ impl TimeBinnedFromLayers {
let inp = super::gapfill::GapFill::new(
"FromLayers".into(),
ch_conf.clone(),
cache_usage.clone(),
transform_query.clone(),
sub.clone(),
log_level.clone(),
@@ -98,6 +102,7 @@ impl TimeBinnedFromLayers {
)?;
let ret = Self {
ch_conf,
cache_usage,
transform_query,
sub,
log_level,
@@ -119,6 +124,7 @@ impl TimeBinnedFromLayers {
let inp = super::gapfill::GapFill::new(
"FromLayers".into(),
ch_conf.clone(),
cache_usage.clone(),
transform_query.clone(),
sub.clone(),
log_level.clone(),
@@ -137,6 +143,7 @@ impl TimeBinnedFromLayers {
);
let ret = Self {
ch_conf,
cache_usage,
transform_query,
sub,
log_level,
@@ -168,6 +175,7 @@ impl TimeBinnedFromLayers {
)?;
let ret = Self {
ch_conf,
cache_usage,
transform_query,
sub,
log_level,

View File

@@ -13,6 +13,7 @@ use items_0::Empty;
use items_0::WithLen;
use items_2::binsdim0::BinsDim0;
use netpod::log::*;
use netpod::query::CacheUsage;
use netpod::range::evrange::NanoRange;
use netpod::range::evrange::SeriesRange;
use netpod::BinnedRange;
@@ -59,6 +60,7 @@ type INP = Pin<Box<dyn Stream<Item = Sitemty<BinsDim0<f32>>> + Send>>;
pub struct GapFill {
dbgname: String,
ch_conf: ChannelTypeConfigGen,
cache_usage: CacheUsage,
transform_query: TransformQuery,
sub: EventsSubQuerySettings,
log_level: String,
@@ -89,6 +91,7 @@ impl GapFill {
pub fn new(
dbgname_parent: String,
ch_conf: ChannelTypeConfigGen,
cache_usage: CacheUsage,
transform_query: TransformQuery,
sub: EventsSubQuerySettings,
log_level: String,
@@ -102,19 +105,21 @@ impl GapFill {
) -> Result<Self, Error> {
let dbgname = format!("{}--[{}]", dbgname_parent, range);
debug_init!("new dbgname {}", dbgname);
let inp = super::cached::reader::CachedReader::new(
series,
range.bin_len.to_dt_ms(),
range.clone(),
cache_read_provider.clone(),
)?
.map(|x| match x {
Ok(x) => Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))),
Err(e) => Err(::err::Error::from_string(e)),
});
let inp = if cache_usage.is_cache_read() {
let stream = super::cached::reader::CachedReader::new(series, range.clone(), cache_read_provider.clone())?
.map(|x| match x {
Ok(x) => Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))),
Err(e) => Err(::err::Error::from_string(e)),
});
Box::pin(stream) as Pin<Box<dyn Stream<Item = Sitemty<BinsDim0<f32>>> + Send>>
} else {
let stream = futures_util::stream::empty();
Box::pin(stream)
};
let ret = Self {
dbgname,
ch_conf,
cache_usage,
transform_query,
sub,
log_level,
@@ -123,7 +128,7 @@ impl GapFill {
range,
do_time_weight,
bin_len_layers,
inp: Some(Box::pin(inp)),
inp: Some(inp),
inp_range_final: false,
inp_buf: None,
inp_finer: None,
@@ -156,15 +161,22 @@ impl GapFill {
if bins.len() != 0 {
bins.clone().drain_into(&mut self.bins_for_cache_write, 0..bins.len());
}
self.cache_write_intermediate()?;
// TODO make sure that input does not send "made-up" empty future bins.
// On the other hand, if the request is over past range, but the channel was silent ever since?
// Then we should in principle know that from is-alive status checking.
// So, until then, allow made-up bins?
// Maybe, for now, only write those bins before some last non-zero-count bin. The only safe way.
if self.cache_usage.is_cache_write() {
self.cache_write_intermediate()?;
} // TODO make sure that input does not send "made-up" empty future bins.
// On the other hand, if the request is over past range, but the channel was silent ever since?
// Then we should in principle know that from is-alive status checking.
// So, until then, allow made-up bins?
// Maybe, for now, only write those bins before some last non-zero-count bin. The only safe way.
Ok(bins)
}
fn setup_sub(self: Pin<&mut Self>, range: NanoRange) -> Result<(), Error> {
trace_handle!("{} SETUP SUB STREAM {}", self.dbgname, range);
self.setup_inp_finer(range, true)?;
Ok(())
}
fn handle_bins(mut self: Pin<&mut Self>, bins: BinsDim0<f32>) -> Result<BinsDim0<f32>, Error> {
trace_handle!("{} handle_bins {}", self.dbgname, bins);
// TODO could use an interface to iterate over opaque bin items that only expose
@@ -178,12 +190,7 @@ impl GapFill {
}
if let Some(last) = self.last_bin_ts2 {
if ts1 != last.ns() {
trace_handle!(
"{} detect a gap ------------- SETUP SUB STREAM ts1 {} last {}",
self.dbgname,
ts1,
last
);
trace_handle!("{} detect a gap BETWEEN last {} ts1 {}", self.dbgname, last, ts1);
let mut ret = <BinsDim0<f32> as items_0::Empty>::empty();
let mut bins = bins;
bins.drain_into(&mut ret, 0..i);
@@ -192,9 +199,24 @@ impl GapFill {
beg: last.ns(),
end: ts1,
};
self.setup_inp_finer(range, true)?;
self.setup_sub(range)?;
return Ok(ret);
} else {
// nothing to do
}
} else if ts1 != self.range.nano_beg().ns() {
trace_handle!(
"{} detect a gap BEGIN beg {} ts1 {}",
self.dbgname,
self.range.nano_beg(),
ts1
);
let range = NanoRange {
beg: self.range.nano_beg().ns(),
end: ts1,
};
self.setup_sub(range)?;
return Ok(BinsDim0::empty());
}
self.last_bin_ts2 = Some(TsNano::from_ns(ts2));
}
@@ -220,6 +242,7 @@ impl GapFill {
let inp_finer = GapFill::new(
self.dbgname.clone(),
self.ch_conf.clone(),
self.cache_usage.clone(),
self.transform_query.clone(),
self.sub.clone(),
self.log_level.clone(),
@@ -282,7 +305,7 @@ impl GapFill {
}
let aa = &self.bins_for_cache_write;
if aa.len() >= 2 {
for (i, (&c1, &c2)) in aa.counts.iter().rev().zip(aa.counts.iter().rev().skip(1)).enumerate() {
for (i, (&c1, &_c2)) in aa.counts.iter().rev().zip(aa.counts.iter().rev().skip(1)).enumerate() {
if c1 != 0 {
let n = aa.len() - (1 + i);
debug_cache!("{} cache_write_on_end consider {} for write", self.dbgname, n);
@@ -299,7 +322,7 @@ impl GapFill {
fn cache_write_intermediate(mut self: Pin<&mut Self>) -> Result<(), Error> {
let aa = &self.bins_for_cache_write;
if aa.len() >= 2 {
for (i, (&c1, &c2)) in aa.counts.iter().rev().zip(aa.counts.iter().rev().skip(1)).enumerate() {
for (i, (&c1, &_c2)) in aa.counts.iter().rev().zip(aa.counts.iter().rev().skip(1)).enumerate() {
if c1 != 0 {
let n = aa.len() - (1 + i);
debug_cache!("{} cache_write_intermediate consider {} for write", self.dbgname, n);
@@ -335,10 +358,6 @@ impl Stream for GapFill {
Pending => Pending,
}
} else if let Some(inp_finer) = self.inp_finer.as_mut() {
// TODO
// detect also gaps here: if gap from finer, then error.
// on CacheUsage Use or Rereate:
// write these bins to cache because we did not find them in cache before.
match inp_finer.poll_next_unpin(cx) {
Ready(Some(Ok(x))) => match x {
StreamItem::DataItem(RangeCompletableItem::Data(x)) => {
@@ -351,9 +370,13 @@ impl Stream for GapFill {
trace_handle!("{} RECV RANGE FINAL", self.dbgname);
self.inp_finer_range_final = true;
self.inp_finer_range_final_cnt += 1;
match self.as_mut().cache_write_on_end() {
Ok(()) => continue,
Err(e) => Ready(Some(Err(::err::Error::from_string(e)))),
if self.cache_usage.is_cache_write() {
match self.as_mut().cache_write_on_end() {
Ok(()) => continue,
Err(e) => Ready(Some(Err(::err::Error::from_string(e)))),
}
} else {
continue;
}
}
StreamItem::Log(x) => Ready(Some(Ok(StreamItem::Log(x)))),
@@ -366,23 +389,32 @@ impl Stream for GapFill {
self.dbgname,
self.last_bin_ts2
);
let exp_finer_range =
::core::mem::replace(&mut self.exp_finer_range, NanoRange { beg: 0, end: 0 });
self.inp_finer = None;
if let Some(j) = self.last_bin_ts2 {
if j.ns() != self.exp_finer_range.end() {
if j.ns() != exp_finer_range.end() {
trace_handle!(
"{} inp_finer Ready(None) last_bin_ts2 {:?} exp_finer_range {:?}",
self.dbgname,
self.last_bin_ts2,
self.exp_finer_range
exp_finer_range
);
Ready(Some(Err(::err::Error::from_string(
"finer input didn't deliver to the end",
))))
if self.inp_finer_fills_gap {
Ready(Some(Err(::err::Error::from_string(
"finer input didn't deliver to the end",
))))
} else {
warn!(
"{} inp_finer Ready(None) last_bin_ts2 {:?} not delivered to the end, but maybe in the future",
self.dbgname, self.last_bin_ts2
);
continue;
}
} else {
self.exp_finer_range = NanoRange { beg: 0, end: 0 };
continue;
}
} else {
} else if self.inp_finer_fills_gap {
error!(
"{} inp_finer Ready(None) last_bin_ts2 {:?}",
self.dbgname, self.last_bin_ts2
@@ -390,6 +422,12 @@ impl Stream for GapFill {
Ready(Some(Err(::err::Error::from_string(
"finer input delivered nothing, received nothing at all so far",
))))
} else {
warn!(
"{} inp_finer Ready(None) last_bin_ts2 {:?}",
self.dbgname, self.last_bin_ts2
);
continue;
}
}
Pending => Pending,

View File

@@ -1,4 +1,6 @@
use crate::collect::Collect;
use crate::json_stream::JsonBytes;
use crate::json_stream::JsonStream;
use crate::rangefilter2::RangeFilter2;
use crate::tcprawclient::container_stream_from_bytes_stream;
use crate::tcprawclient::make_sub_query;
@@ -335,14 +337,15 @@ async fn timebinned_stream(
.collect()
} else {
vec![
DtMs::from_ms_u64(1000 * 60),
// DtMs::from_ms_u64(1000 * 60 * 60),
DtMs::from_ms_u64(1000 * 10),
DtMs::from_ms_u64(1000 * 60 * 60),
// DtMs::from_ms_u64(1000 * 60 * 60 * 12),
// DtMs::from_ms_u64(1000 * 10),
]
};
let stream = crate::timebin::TimeBinnedFromLayers::new(
ch_conf,
query.cache_usage(),
query.transform().clone(),
EventsSubQuerySettings::from(&query),
query.log_level().into(),
@@ -445,3 +448,114 @@ pub async fn timebinned_json(
let jsval = serde_json::to_value(&collected)?;
Ok(jsval)
}
fn take_collector_result(coll: &mut Box<dyn items_0::collect_s::Collector>) -> Option<serde_json::Value> {
match coll.result(None, None) {
Ok(collres) => {
let collres = if let Some(bins) = collres
.as_any_ref()
.downcast_ref::<items_2::binsdim0::BinsDim0CollectedResult<netpod::EnumVariant>>()
{
info!("MATCHED ENUM");
bins.boxed_collected_with_enum_fix()
} else {
collres
};
match serde_json::to_value(&collres) {
Ok(val) => Some(val),
Err(e) => Some(serde_json::Value::String(format!("{e}"))),
}
}
Err(e) => Some(serde_json::Value::String(format!("{e}"))),
}
}
pub async fn timebinned_json_framed(
query: BinnedQuery,
ch_conf: ChannelTypeConfigGen,
ctx: &ReqCtx,
open_bytes: OpenBoxedBytesStreamsBox,
cache_read_provider: Option<Arc<dyn CacheReadProvider>>,
events_read_provider: Option<Arc<dyn EventsReadProvider>>,
) -> Result<JsonStream, Error> {
trace!("timebinned_json_framed");
let binned_range = BinnedRangeEnum::covering_range(query.range().clone(), query.bin_count())?;
// TODO derive better values, from query
let stream = timebinned_stream(
query.clone(),
binned_range.clone(),
ch_conf,
ctx,
open_bytes,
cache_read_provider,
events_read_provider,
)
.await?;
let stream = timebinned_to_collectable(stream);
let mut coll = None;
let interval = tokio::time::interval(Duration::from(
query.timeout_content().unwrap_or(Duration::from_millis(1000)),
));
let stream = stream.map(|x| Some(x)).chain(futures_util::stream::iter([None]));
let stream = tokio_stream::StreamExt::timeout_repeating(stream, interval).map(move |x| match x {
Ok(item) => match item {
Some(x) => match x {
Ok(x) => match x {
StreamItem::DataItem(x) => match x {
RangeCompletableItem::Data(mut item) => {
let coll = coll.get_or_insert_with(|| item.new_collector());
coll.ingest(&mut item);
if coll.len() >= 128 {
take_collector_result(coll)
} else {
None
}
}
RangeCompletableItem::RangeComplete => None,
},
StreamItem::Log(x) => {
info!("{x:?}");
None
}
StreamItem::Stats(x) => {
info!("{x:?}");
None
}
},
Err(e) => Some(serde_json::Value::String(format!("{e}"))),
},
None => {
if let Some(coll) = coll.as_mut() {
take_collector_result(coll)
} else {
None
}
}
},
Err(_) => {
if let Some(coll) = coll.as_mut() {
if coll.len() != 0 {
take_collector_result(coll)
} else {
None
}
} else {
None
}
}
});
// TODO skip the intermediate conversion to js value, go directly to string data
let stream = stream.map(|x| match x {
Some(x) => Some(JsonBytes::new(serde_json::to_string(&x).unwrap())),
None => None,
});
let stream = stream.filter_map(|x| futures_util::future::ready(x));
let stream = stream.map(|x| Ok(x));
// let stream = dyn_events_stream(evq, ch_conf, ctx, open_bytes).await?;
// let stream = events_stream_to_json_stream(stream);
// let stream = non_empty(stream);
// let stream = only_first_err(stream);
Ok(Box::pin(stream))
}