This commit is contained in:
Dominik Werder
2024-09-11 17:23:11 +02:00
parent 741c1380c7
commit f550d37602
26 changed files with 932 additions and 196 deletions
+3 -7
View File
@@ -19,10 +19,7 @@ use tokio::io::AsyncRead;
pub type BoxedBytesStream = Pin<Box<dyn Stream<Item = Result<Bytes, Error>> + Send>>;
#[allow(unused)]
macro_rules! trace2 {
($($arg:tt)*) => ();
($($arg:tt)*) => (trace!($($arg)*));
}
macro_rules! trace2 { ($($arg:tt)*) => ( if false { trace!($($arg)*); } ); }
impl err::ToErr for crate::slidebuf::Error {
fn to_err(self) -> Error {
@@ -104,7 +101,7 @@ where
}
}
fn poll_upstream(&mut self, cx: &mut Context) -> Poll<Result<usize, Error>> {
fn poll_upstream(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Result<usize, Error>> {
trace2!("poll_upstream");
use Poll::*;
// use tokio::io::AsyncRead;
@@ -112,7 +109,6 @@ where
// let mut buf = ReadBuf::new(self.buf.available_writable_area(self.need_min.saturating_sub(self.buf.len()))?);
let inp = &mut self.inp;
pin_mut!(inp);
trace!("poll_upstream");
match inp.poll_next(cx) {
Ready(Some(Ok(x))) => match self.buf.available_writable_area(x.len()) {
Ok(dst) => {
@@ -254,7 +250,7 @@ where
}
}
} else {
match self.poll_upstream(cx) {
match self.as_mut().poll_upstream(cx) {
Ready(Ok(n1)) => {
if n1 == 0 {
self.done = true;
+1
View File
@@ -1,5 +1,6 @@
mod basic;
pub mod cached;
pub mod fromevents;
mod fromlayers;
mod gapfill;
mod grid;
+10 -24
View File
@@ -17,31 +17,16 @@ use std::task::Context;
use std::task::Poll;
#[allow(unused)]
macro_rules! trace2 {
($($arg:tt)*) => {
if false {
trace!($($arg)*);
}
};
}
macro_rules! debug_first { ($($arg:tt)*) => ( if false { debug!($($arg)*); } ) }
#[allow(unused)]
macro_rules! trace3 {
($($arg:tt)*) => {
if false {
trace!($($arg)*);
}
};
}
macro_rules! trace2 { ($($arg:tt)*) => ( if false { trace!($($arg)*); } ) }
#[allow(unused)]
macro_rules! trace4 {
($($arg:tt)*) => {
if false {
trace!($($arg)*);
}
};
}
macro_rules! trace3 { ($($arg:tt)*) => ( if false { trace!($($arg)*); } ) }
#[allow(unused)]
macro_rules! trace4 { ($($arg:tt)*) => ( if false { trace!($($arg)*); } ) }
type SitemtyStream<T> = Pin<Box<dyn Stream<Item = Sitemty<T>> + Send>>;
@@ -115,7 +100,7 @@ where
self.process_item(item);
let mut do_emit = false;
if self.done_first_input == false {
debug!(
debug_first!(
"emit container after the first input len {} binner {}",
item_len,
self.binner.is_some()
@@ -191,13 +176,13 @@ where
trace2!("================= handle_none");
let self_range_final = self.range_final;
if let Some(binner) = self.binner.as_mut() {
trace!("bins ready count before finish {}", binner.bins_ready_count());
trace2!("bins ready count before finish {}", binner.bins_ready_count());
// TODO rework the finish logic
if self_range_final {
binner.set_range_complete();
}
binner.push_in_progress(false);
trace!("bins ready count after finish {}", binner.bins_ready_count());
trace2!("bins ready count after finish {}", binner.bins_ready_count());
if let Some(bins) = binner.bins_ready() {
self.done_data = true;
Ok(Break(Ready(sitem_data(bins))))
@@ -260,6 +245,7 @@ where
} else if self.done_data {
self.done = true;
if self.range_final {
info!("TimeBinnedStream EMIT RANGE FINAL");
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else {
continue;
+64 -5
View File
@@ -1,23 +1,71 @@
use crate as streams;
use err::thiserror;
use err::ThisError;
use futures_util::FutureExt;
use futures_util::Stream;
use futures_util::StreamExt;
use items_0::streamitem::Sitemty;
use items_0::timebin::TimeBinnable;
use items_2::binsdim0::BinsDim0;
use items_2::channelevents::ChannelEvents;
use netpod::log::*;
use netpod::BinnedRange;
use netpod::ChConf;
use netpod::DtMs;
use netpod::TsNano;
use query::api4::events::EventsSubQuery;
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use std::task::Context;
use std::task::Poll;
pub struct Reading {
fut: Pin<Box<dyn Future<Output = Result<BinsDim0<f32>, Box<dyn std::error::Error>>> + Send>>,
pub struct EventsReading {
stream: Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>,
}
impl Future for Reading {
type Output = Result<BinsDim0<f32>, Box<dyn std::error::Error>>;
impl EventsReading {
pub fn new(stream: Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>) -> Self {
Self { stream }
}
}
impl Stream for EventsReading {
type Item = Sitemty<ChannelEvents>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
self.stream.poll_next_unpin(cx)
}
}
pub trait EventsReadProvider: Send + Sync {
fn read(&self, evq: EventsSubQuery, chconf: ChConf) -> EventsReading;
}
pub struct CacheReading {
fut: Pin<Box<dyn Future<Output = Result<BinsDim0<f32>, Box<dyn std::error::Error + Send>>> + Send>>,
}
impl Future for CacheReading {
type Output = Result<BinsDim0<f32>, Box<dyn std::error::Error + Send>>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
self.fut.poll_unpin(cx)
}
}
pub struct CacheWriting {
fut: Pin<Box<dyn Future<Output = Result<(), streams::timebin::cached::reader::Error>> + Send>>,
}
impl CacheWriting {
pub fn new(fut: Pin<Box<dyn Future<Output = Result<(), streams::timebin::cached::reader::Error>> + Send>>) -> Self {
Self { fut }
}
}
impl Future for CacheWriting {
type Output = Result<(), streams::timebin::cached::reader::Error>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
self.fut.poll_unpin(cx)
@@ -25,13 +73,17 @@ impl Future for Reading {
}
pub trait CacheReadProvider: Send + Sync {
fn read(&self) -> Reading;
fn read(&self, series: u64, range: BinnedRange<TsNano>) -> CacheReading;
fn write(&self, series: u64, bins: BinsDim0<f32>) -> CacheWriting;
}
#[derive(Debug, ThisError)]
#[cstm(name = "BinCachedReader")]
pub enum Error {
TodoImpl,
ChannelSend,
ChannelRecv,
Scylla(String),
}
pub struct CachedReader {
@@ -55,6 +107,13 @@ impl Stream for CachedReader {
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
// TODO
// Must split over different msp (because pkey).
// If we choose the partitioning length low enough, no need to issue multiple queries.
// Change the worker interface:
// We should already compute here the msp and off because we must here implement the loop logic.
// Therefore worker interface should not accept BinnedRange, but msp and off range.
error!("TODO CachedReader impl split reads over known ranges");
// Ready(Some(Err(Error::TodoImpl)))
Ready(None)
}
+80
View File
@@ -0,0 +1,80 @@
use super::cached::reader::EventsReadProvider;
use super::cached::reader::EventsReading;
use err::thiserror;
use err::ThisError;
use futures_util::Stream;
use futures_util::StreamExt;
use items_0::streamitem::RangeCompletableItem;
use items_0::streamitem::Sitemty;
use items_0::streamitem::StreamItem;
use items_2::binsdim0::BinsDim0;
use netpod::log::*;
use netpod::BinnedRange;
use netpod::ChConf;
use netpod::TsNano;
use query::api4::events::EventsSubQuery;
use std::pin::Pin;
use std::sync::Arc;
use std::task::Context;
use std::task::Poll;
#[derive(Debug, ThisError)]
#[cstm(name = "ReadingBinnedFromEvents")]
pub enum Error {}
pub struct BinnedFromEvents {
stream: Pin<Box<dyn Stream<Item = Sitemty<BinsDim0<f32>>> + Send>>,
}
impl BinnedFromEvents {
pub fn new(
range: BinnedRange<TsNano>,
evq: EventsSubQuery,
chconf: ChConf,
do_time_weight: bool,
read_provider: Arc<dyn EventsReadProvider>,
) -> Result<Self, Error> {
if !evq.range().is_time() {
panic!();
}
let stream = read_provider.read(evq, chconf);
let stream = Box::pin(stream);
let stream = super::basic::TimeBinnedStream::new(stream, netpod::BinnedRangeEnum::Time(range), do_time_weight);
let stream = stream.map(|item| match item {
Ok(x) => match x {
StreamItem::DataItem(x) => match x {
RangeCompletableItem::Data(mut x) => {
// TODO need a typed time binner
if let Some(x) = x.as_any_mut().downcast_mut::<BinsDim0<f32>>() {
let y = x.clone();
Ok(StreamItem::DataItem(RangeCompletableItem::Data(y)))
} else {
Err(::err::Error::with_msg_no_trace(
"GapFill expects incoming BinsDim0<f32>",
))
}
}
RangeCompletableItem::RangeComplete => {
info!("BinnedFromEvents sees range final");
Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))
}
},
StreamItem::Log(x) => Ok(StreamItem::Log(x)),
StreamItem::Stats(x) => Ok(StreamItem::Stats(x)),
},
Err(e) => Err(e),
});
let ret = Self {
stream: Box::pin(stream),
};
Ok(ret)
}
}
impl Stream for BinnedFromEvents {
type Item = Sitemty<BinsDim0<f32>>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
self.stream.poll_next_unpin(cx)
}
}
+57 -14
View File
@@ -1,5 +1,7 @@
use super::cached::reader::CacheReadProvider;
use super::cached::reader::EventsReadProvider;
use crate::tcprawclient::OpenBoxedBytesStreamsBox;
use crate::timebin::fromevents::BinnedFromEvents;
use crate::timebin::grid::find_next_finer_bin_len;
use err::thiserror;
use err::ThisError;
@@ -13,12 +15,17 @@ use items_0::streamitem::StreamItem;
use items_0::timebin::TimeBinnableTy;
use items_2::binsdim0::BinsDim0;
use netpod::log::*;
use netpod::range::evrange::SeriesRange;
use netpod::BinnedRange;
use netpod::BinnedRangeEnum;
use netpod::ChConf;
use netpod::ChannelTypeConfigGen;
use netpod::DtMs;
use netpod::ReqCtx;
use netpod::SeriesKind;
use netpod::TsNano;
use query::api4::events::EventsSubQuery;
use query::api4::events::EventsSubQuerySelect;
use query::api4::events::EventsSubQuerySettings;
use query::transform::TransformQuery;
use std::pin::Pin;
@@ -29,8 +36,9 @@ use std::task::Poll;
#[derive(Debug, ThisError)]
#[cstm(name = "TimeBinnedFromLayers")]
pub enum Error {
Logic,
GapFill(#[from] super::gapfill::Error),
BinnedFromEvents(#[from] super::fromevents::Error),
SfDatabufferNotSupported,
}
type BoxedInput = Pin<Box<dyn Stream<Item = Sitemty<BinsDim0<f32>>> + Send>>;
@@ -61,7 +69,8 @@ impl TimeBinnedFromLayers {
range: BinnedRange<TsNano>,
do_time_weight: bool,
bin_len_layers: Vec<DtMs>,
cache_read_provider: Arc<dyn CacheReadProvider + Send>,
cache_read_provider: Arc<dyn CacheReadProvider>,
events_read_provider: Arc<dyn EventsReadProvider>,
) -> Result<Self, Error> {
info!(
"{}::new {:?} {:?} {:?}",
@@ -72,19 +81,20 @@ impl TimeBinnedFromLayers {
);
let bin_len = DtMs::from_ms_u64(range.bin_len.ms());
if bin_len_layers.contains(&bin_len) {
info!("{}::new bin_len in layers", Self::type_name());
info!("{}::new bin_len in layers {:?}", Self::type_name(), range);
let inp = super::gapfill::GapFill::new(
"FromLayers".into(),
ch_conf.clone(),
transform_query.clone(),
sub.clone(),
log_level.clone(),
ctx.clone(),
open_bytes.clone(),
series,
range,
do_time_weight,
bin_len_layers,
cache_read_provider,
events_read_provider.clone(),
)?;
let ret = Self {
ch_conf,
@@ -99,22 +109,26 @@ impl TimeBinnedFromLayers {
} else {
match find_next_finer_bin_len(bin_len, &bin_len_layers) {
Some(finer) => {
// TODO
// produce from binned sub-stream with additional binner.
let range = BinnedRange::from_nano_range(range.to_nano_range(), finer);
warn!("{}::new next finer {:?} {:?}", Self::type_name(), finer, range);
let range_finer = BinnedRange::from_nano_range(range.to_nano_range(), finer);
warn!(
"{}::new next finer from bins {:?} {:?}",
Self::type_name(),
finer,
range_finer
);
let inp = super::gapfill::GapFill::new(
"FromLayers".into(),
ch_conf.clone(),
transform_query.clone(),
sub.clone(),
log_level.clone(),
ctx.clone(),
open_bytes.clone(),
series,
range.clone(),
range_finer.clone(),
do_time_weight,
bin_len_layers,
cache_read_provider,
events_read_provider.clone(),
)?;
let inp = super::basic::TimeBinnedStream::new(
Box::pin(inp),
@@ -133,10 +147,39 @@ impl TimeBinnedFromLayers {
Ok(ret)
}
None => {
warn!("{}::new NO next finer", Self::type_name());
// TODO
// produce from events
todo!()
warn!("{}::new next finer from events", Self::type_name());
let series_range = SeriesRange::TimeRange(range.to_nano_range());
let one_before_range = true;
let select = EventsSubQuerySelect::new(
ch_conf.clone(),
series_range,
one_before_range,
transform_query.clone(),
);
let evq = EventsSubQuery::from_parts(select, sub.clone(), ctx.reqid().into(), log_level.clone());
match &ch_conf {
ChannelTypeConfigGen::Scylla(chconf) => {
let inp = BinnedFromEvents::new(
range,
evq,
chconf.clone(),
do_time_weight,
events_read_provider,
)?;
let ret = Self {
ch_conf,
transform_query,
sub,
log_level,
ctx,
open_bytes,
inp: Box::pin(inp),
};
warn!("{}::new setup from events", Self::type_name());
Ok(ret)
}
ChannelTypeConfigGen::SfDatabuffer(_) => return Err(Error::SfDatabufferNotSupported),
}
}
}
}
+217 -92
View File
@@ -1,20 +1,27 @@
use super::cached::reader::CacheReadProvider;
use crate::tcprawclient::OpenBoxedBytesStreamsBox;
use super::cached::reader::EventsReadProvider;
use crate::timebin::fromevents::BinnedFromEvents;
use err::thiserror;
use err::ThisError;
use futures_util::FutureExt;
use futures_util::Stream;
use futures_util::StreamExt;
use items_0::streamitem::RangeCompletableItem;
use items_0::streamitem::Sitemty;
use items_0::streamitem::StreamItem;
use items_0::Empty;
use items_0::WithLen;
use items_2::binsdim0::BinsDim0;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::range::evrange::SeriesRange;
use netpod::BinnedRange;
use netpod::ChannelTypeConfigGen;
use netpod::DtMs;
use netpod::ReqCtx;
use netpod::TsNano;
use query::api4::events::EventsSubQuery;
use query::api4::events::EventsSubQuerySelect;
use query::api4::events::EventsSubQuerySettings;
use query::transform::TransformQuery;
use std::pin::Pin;
@@ -22,11 +29,27 @@ use std::sync::Arc;
use std::task::Context;
use std::task::Poll;
#[allow(unused)]
macro_rules! debug_init { ($($arg:tt)*) => ( if true { debug!($($arg)*); } ) }
#[allow(unused)]
macro_rules! debug_setup { ($($arg:tt)*) => ( if true { debug!($($arg)*); } ) }
#[allow(unused)]
macro_rules! debug_cache { ($($arg:tt)*) => ( if true { debug!($($arg)*); } ) }
#[allow(unused)]
macro_rules! trace_handle { ($($arg:tt)*) => ( if true { trace!($($arg)*); } ) }
#[derive(Debug, ThisError)]
#[cstm(name = "BinCachedGapFill")]
pub enum Error {
CacheReader(#[from] super::cached::reader::Error),
GapFromFiner,
#[error("InputBeforeRange({0}, {1})")]
InputBeforeRange(NanoRange, BinnedRange<TsNano>),
SfDatabufferNotSupported,
EventsReader(#[from] super::fromevents::Error),
}
type INP = Pin<Box<dyn Stream<Item = Sitemty<BinsDim0<f32>>> + Send>>;
@@ -34,41 +57,51 @@ type INP = Pin<Box<dyn Stream<Item = Sitemty<BinsDim0<f32>>> + Send>>;
// Try to read from cache for the given bin len.
// For gaps in the stream, construct an alternative input from finer bin len with a binner.
pub struct GapFill {
dbgname: String,
ch_conf: ChannelTypeConfigGen,
transform_query: TransformQuery,
sub: EventsSubQuerySettings,
log_level: String,
ctx: Arc<ReqCtx>,
open_bytes: OpenBoxedBytesStreamsBox,
series: u64,
range: BinnedRange<TsNano>,
do_time_weight: bool,
bin_len_layers: Vec<DtMs>,
inp: INP,
inp: Option<INP>,
inp_range_final: bool,
inp_buf: Option<BinsDim0<f32>>,
inp_finer: Option<INP>,
inp_finer_range_final: bool,
inp_finer_range_final_cnt: u32,
inp_finer_range_final_max: u32,
inp_finer_fills_gap: bool,
last_bin_ts2: Option<TsNano>,
exp_finer_range: NanoRange,
cache_read_provider: Arc<dyn CacheReadProvider>,
events_read_provider: Arc<dyn EventsReadProvider>,
bins_for_cache_write: BinsDim0<f32>,
done: bool,
cache_writing: Option<super::cached::reader::CacheWriting>,
}
impl GapFill {
// bin_len of the given range must be a cacheable bin_len.
pub fn new(
dbgname_parent: String,
ch_conf: ChannelTypeConfigGen,
transform_query: TransformQuery,
sub: EventsSubQuerySettings,
log_level: String,
ctx: Arc<ReqCtx>,
open_bytes: OpenBoxedBytesStreamsBox,
series: u64,
range: BinnedRange<TsNano>,
do_time_weight: bool,
bin_len_layers: Vec<DtMs>,
cache_read_provider: Arc<dyn CacheReadProvider>,
events_read_provider: Arc<dyn EventsReadProvider>,
) -> Result<Self, Error> {
// super::fromlayers::TimeBinnedFromLayers::new(series, range, do_time_weight, bin_len_layers)?;
let dbgname = format!("{}--[{}]", dbgname_parent, range);
debug_init!("new dbgname {}", dbgname);
let inp = super::cached::reader::CachedReader::new(
series,
range.bin_len.to_dt_ms(),
@@ -80,28 +113,38 @@ impl GapFill {
Err(e) => Err(::err::Error::from_string(e)),
});
let ret = Self {
dbgname,
ch_conf,
transform_query,
sub,
log_level,
ctx,
open_bytes,
series,
range,
do_time_weight,
bin_len_layers,
inp: Box::pin(inp),
inp: Some(Box::pin(inp)),
inp_range_final: false,
inp_buf: None,
inp_finer: None,
inp_finer_range_final: false,
inp_finer_range_final_cnt: 0,
inp_finer_range_final_max: 0,
inp_finer_fills_gap: false,
last_bin_ts2: None,
// TODO just dummy:
exp_finer_range: NanoRange { beg: 0, end: 0 },
cache_read_provider,
events_read_provider,
bins_for_cache_write: BinsDim0::empty(),
done: false,
cache_writing: None,
};
Ok(ret)
}
fn handle_bins_finer(mut self: Pin<&mut Self>, bins: BinsDim0<f32>) -> Result<BinsDim0<f32>, Error> {
trace_handle!("{} handle_bins_finer {}", self.dbgname, bins);
for (&ts1, &ts2) in bins.ts1s.iter().zip(&bins.ts2s) {
if let Some(last) = self.last_bin_ts2 {
if ts1 != last.ns() {
@@ -110,25 +153,37 @@ impl GapFill {
}
self.last_bin_ts2 = Some(TsNano::from_ns(ts2));
}
// TODO keep bins from finer source.
// Only write bins to cache if we receive another
if bins.len() != 0 {
bins.clone().drain_into(&mut self.bins_for_cache_write, 0..bins.len());
}
self.cache_write_intermediate()?;
// TODO make sure that input does not send "made-up" empty future bins.
// On the other hand, if the request is over past range, but the channel was silent ever since?
// Then we should in principle know that from is-alive status checking.
// So, until then, allow made-up bins?
// Maybe, for now, only write those bins before some last non-zero-count bin. The only safe way.
Ok(bins)
}
fn handle_bins(mut self: Pin<&mut Self>, bins: BinsDim0<f32>) -> Result<BinsDim0<f32>, Error> {
trace_handle!("{} handle_bins {}", self.dbgname, bins);
// TODO could use an interface to iterate over opaque bin items that only expose
// edge and count information with all remaining values opaque.
for (i, (&ts1, &ts2)) in bins.ts1s.iter().zip(&bins.ts2s).enumerate() {
if ts1 < self.range.nano_beg().ns() {
return Err(Error::InputBeforeRange(
NanoRange::from_ns_u64(ts1, ts2),
self.range.clone(),
));
}
if let Some(last) = self.last_bin_ts2 {
if ts1 != last.ns() {
trace_handle!(
"{} detect a gap ------------- SETUP SUB STREAM ts1 {} last {}",
self.dbgname,
ts1,
last
);
let mut ret = <BinsDim0<f32> as items_0::Empty>::empty();
let mut bins = bins;
bins.drain_into(&mut ret, 0..i);
@@ -137,7 +192,7 @@ impl GapFill {
beg: last.ns(),
end: ts1,
};
self.setup_inp_finer(range)?;
self.setup_inp_finer(range, true)?;
return Ok(ret);
}
}
@@ -146,25 +201,35 @@ impl GapFill {
Ok(bins)
}
fn setup_inp_finer(mut self: Pin<&mut Self>, range: NanoRange) -> Result<(), Error> {
// Set up range to fill from finer.
fn setup_inp_finer(mut self: Pin<&mut Self>, range: NanoRange, inp_finer_fills_gap: bool) -> Result<(), Error> {
self.inp_finer_range_final = false;
self.inp_finer_range_final_max += 1;
self.inp_finer_fills_gap = inp_finer_fills_gap;
self.exp_finer_range = range.clone();
if let Some(bin_len_finer) =
super::grid::find_next_finer_bin_len(self.range.bin_len.to_dt_ms(), &self.bin_len_layers)
{
debug_setup!(
"{} setup_inp_finer next finer from bins {} {} from {}",
self.dbgname,
range,
bin_len_finer,
self.range.bin_len.to_dt_ms()
);
let range_finer = BinnedRange::from_nano_range(range, bin_len_finer);
let inp_finer = GapFill::new(
self.dbgname.clone(),
self.ch_conf.clone(),
self.transform_query.clone(),
self.sub.clone(),
self.log_level.clone(),
self.ctx.clone(),
self.open_bytes.clone(),
self.series,
range_finer.clone(),
self.do_time_weight,
self.bin_len_layers.clone(),
self.cache_read_provider.clone(),
self.events_read_provider.clone(),
)?;
let stream = Box::pin(inp_finer);
let do_time_weight = self.do_time_weight;
@@ -173,51 +238,77 @@ impl GapFill {
super::basic::TimeBinnedStream::new(stream, netpod::BinnedRangeEnum::Time(range), do_time_weight);
self.inp_finer = Some(Box::pin(stream));
} else {
let do_time_weight = self.do_time_weight;
debug_setup!("{} setup_inp_finer next finer from events {}", self.dbgname, range);
let series_range = SeriesRange::TimeRange(range.clone());
let one_before_range = true;
let range = BinnedRange::from_nano_range(range, self.range.bin_len.to_dt_ms());
let stream = crate::timebinnedjson::TimeBinnableStream::new(
range.full_range(),
one_before_range,
let select = EventsSubQuerySelect::new(
self.ch_conf.clone(),
series_range,
one_before_range,
self.transform_query.clone(),
self.sub.clone(),
self.log_level.clone(),
self.ctx.clone(),
self.open_bytes.clone(),
);
// let stream: Pin<Box<dyn items_0::transform::TimeBinnableStreamTrait>> = stream;
let stream = Box::pin(stream);
// TODO rename TimeBinnedStream to make it more clear that it is the component which initiates the time binning.
let stream =
super::basic::TimeBinnedStream::new(stream, netpod::BinnedRangeEnum::Time(range), do_time_weight);
let stream = stream.map(|item| match item {
Ok(x) => match x {
StreamItem::DataItem(x) => match x {
RangeCompletableItem::Data(mut x) => {
// TODO need a typed time binner
if let Some(x) = x.as_any_mut().downcast_mut::<BinsDim0<f32>>() {
let y = x.clone();
Ok(StreamItem::DataItem(RangeCompletableItem::Data(y)))
} else {
Err(::err::Error::with_msg_no_trace(
"GapFill expects incoming BinsDim0<f32>",
))
}
}
RangeCompletableItem::RangeComplete => {
Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))
}
},
StreamItem::Log(x) => Ok(StreamItem::Log(x)),
StreamItem::Stats(x) => Ok(StreamItem::Stats(x)),
},
Err(e) => Err(e),
});
// let stream: Pin<
// Box<dyn Stream<Item = Sitemty<Box<dyn items_0::timebin::TimeBinned>>> + Send>,
// > = Box::pin(stream);
self.inp_finer = Some(Box::pin(stream));
let evq = EventsSubQuery::from_parts(
select,
self.sub.clone(),
self.ctx.reqid().into(),
self.log_level.clone(),
);
match &self.ch_conf {
ChannelTypeConfigGen::Scylla(chconf) => {
let range = BinnedRange::from_nano_range(range.clone(), self.range.bin_len.to_dt_ms());
let inp = BinnedFromEvents::new(
range,
evq,
chconf.clone(),
self.do_time_weight,
self.events_read_provider.clone(),
)?;
self.inp_finer = Some(Box::pin(inp));
}
ChannelTypeConfigGen::SfDatabuffer(_) => return Err(Error::SfDatabufferNotSupported),
}
}
Ok(())
}
fn cache_write(mut self: Pin<&mut Self>, bins: BinsDim0<f32>) -> Result<(), Error> {
self.cache_writing = Some(self.cache_read_provider.write(self.series, bins));
Ok(())
}
fn cache_write_on_end(mut self: Pin<&mut Self>) -> Result<(), Error> {
if self.inp_finer_fills_gap {
// TODO can consider all incoming bins as final by assumption.
}
let aa = &self.bins_for_cache_write;
if aa.len() >= 2 {
for (i, (&c1, &c2)) in aa.counts.iter().rev().zip(aa.counts.iter().rev().skip(1)).enumerate() {
if c1 != 0 {
let n = aa.len() - (1 + i);
debug_cache!("{} cache_write_on_end consider {} for write", self.dbgname, n);
let mut bins_write = BinsDim0::empty();
self.bins_for_cache_write.drain_into(&mut bins_write, 0..n);
self.cache_write(bins_write)?;
break;
}
}
}
Ok(())
}
fn cache_write_intermediate(mut self: Pin<&mut Self>) -> Result<(), Error> {
let aa = &self.bins_for_cache_write;
if aa.len() >= 2 {
for (i, (&c1, &c2)) in aa.counts.iter().rev().zip(aa.counts.iter().rev().skip(1)).enumerate() {
if c1 != 0 {
let n = aa.len() - (1 + i);
debug_cache!("{} cache_write_intermediate consider {} for write", self.dbgname, n);
let mut bins_write = BinsDim0::empty();
self.bins_for_cache_write.drain_into(&mut bins_write, 0..n);
self.cache_write(bins_write)?;
break;
}
}
}
Ok(())
}
@@ -229,7 +320,21 @@ impl Stream for GapFill {
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if let Some(inp_finer) = self.inp_finer.as_mut() {
break if self.done {
Ready(None)
} else if let Some(fut) = self.cache_writing.as_mut() {
match fut.poll_unpin(cx) {
Ready(Ok(())) => {
self.cache_writing = None;
continue;
}
Ready(Err(e)) => {
self.cache_writing = None;
Ready(Some(Err(::err::Error::from_string(e))))
}
Pending => Pending,
}
} else if let Some(inp_finer) = self.inp_finer.as_mut() {
// TODO
// detect also gaps here: if gap from finer, then error.
// on CacheUsage Use or Rereate:
@@ -243,26 +348,48 @@ impl Stream for GapFill {
}
}
StreamItem::DataItem(RangeCompletableItem::RangeComplete) => {
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
trace_handle!("{} RECV RANGE FINAL", self.dbgname);
self.inp_finer_range_final = true;
self.inp_finer_range_final_cnt += 1;
match self.as_mut().cache_write_on_end() {
Ok(()) => continue,
Err(e) => Ready(Some(Err(::err::Error::from_string(e)))),
}
}
StreamItem::Log(x) => Ready(Some(Ok(StreamItem::Log(x)))),
StreamItem::Stats(x) => Ready(Some(Ok(StreamItem::Stats(x)))),
},
Ready(Some(Err(e))) => Ready(Some(Err(::err::Error::from_string(e)))),
Ready(None) => {
trace_handle!(
"{} inp_finer Ready(None) last_bin_ts2 {:?}",
self.dbgname,
self.last_bin_ts2
);
self.inp_finer = None;
if let Some(j) = self.last_bin_ts2 {
if j.ns() != self.exp_finer_range.end() {
trace_handle!(
"{} inp_finer Ready(None) last_bin_ts2 {:?} exp_finer_range {:?}",
self.dbgname,
self.last_bin_ts2,
self.exp_finer_range
);
Ready(Some(Err(::err::Error::from_string(
"finer input didn't deliver to the end",
))))
} else {
self.last_bin_ts2 = None;
self.exp_finer_range = NanoRange { beg: 0, end: 0 };
self.inp_finer = None;
continue;
}
} else {
Ready(Some(Err(::err::Error::from_string("finer input delivered nothing"))))
error!(
"{} inp_finer Ready(None) last_bin_ts2 {:?}",
self.dbgname, self.last_bin_ts2
);
Ready(Some(Err(::err::Error::from_string(
"finer input delivered nothing, received nothing at all so far",
))))
}
}
Pending => Pending,
@@ -272,46 +399,52 @@ impl Stream for GapFill {
Ok(x) => Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))))),
Err(e) => Ready(Some(Err(::err::Error::from_string(e)))),
}
} else {
match self.inp.poll_next_unpin(cx) {
} else if let Some(inp) = self.inp.as_mut() {
match inp.poll_next_unpin(cx) {
Ready(Some(Ok(x))) => match x {
StreamItem::DataItem(RangeCompletableItem::Data(x)) => match self.as_mut().handle_bins(x) {
Ok(x) => Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(x))))),
Err(e) => Ready(Some(Err(::err::Error::from_string(e)))),
},
StreamItem::DataItem(RangeCompletableItem::RangeComplete) => {
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
self.inp_range_final = true;
continue;
}
StreamItem::Log(x) => Ready(Some(Ok(StreamItem::Log(x)))),
StreamItem::Stats(x) => Ready(Some(Ok(StreamItem::Stats(x)))),
},
Ready(Some(Err(e))) => Ready(Some(Err(::err::Error::from_string(e)))),
Ready(None) => {
self.inp = None;
// TODO assert that we have emitted up to the requested range.
// If not, request the remaining range from "finer" input.
if let Some(j) = self.last_bin_ts2 {
if j.ns() != self.exp_finer_range.end() {
if j != self.range.nano_end() {
let range = NanoRange {
beg: j.ns(),
end: self.range.full_range().end(),
};
match self.as_mut().setup_inp_finer(range) {
warn!(
"----- RECEIVED SOMETHING, BUT NOT ALL, setup rest from finer {} {} {}",
self.range, j, range
);
match self.as_mut().setup_inp_finer(range, false) {
Ok(()) => {
continue;
}
Err(e) => Ready(Some(Err(::err::Error::from_string(e)))),
}
} else {
// self.last_bin_ts2 = None;
// self.exp_finer_range = NanoRange { beg: 0, end: 0 };
// self.inp_finer = None;
// continue;
info!("----- RECEIVED EVERYTHING");
Ready(None)
}
} else {
warn!("----- NOTHING IN CACHE, SETUP FULL FROM FINER");
let range = self.range.full_range();
match self.as_mut().setup_inp_finer(range) {
let range = self.range.to_nano_range();
warn!(
"----- RECEIVED NOTHING SO FAR AT ALL, setup full range from finer {} {}",
self.range, range
);
match self.as_mut().setup_inp_finer(range, false) {
Ok(()) => {
continue;
}
@@ -321,24 +454,16 @@ impl Stream for GapFill {
}
Pending => Pending,
}
} else {
self.done = true;
if self.inp_finer_range_final_cnt == self.inp_finer_range_final_max {
trace_handle!("{} RANGE FINAL ALL", self.dbgname);
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else {
trace_handle!("{} SUBSTREAMS NOT FINAL", self.dbgname);
continue;
}
};
}
// When do we detect a gap:
// - when the current item poses a gap to the last.
// - when we see EOS before the requested range is filled.
// Requirements:
// Must always request fully cache-aligned ranges.
// Must remember where the last bin ended.
// When a gap is detected:
// - buffer the current item, if there is one (can also be EOS).
// - create a new producer of bin:
// - GapFillwith finer range? FromFiner(series, bin_len, range) ?
// - TimeBinnedFromLayers for a bin_len in layers would also go directly into GapFill.
// what does FromFiner bring to the table?
// It does not attempt to read the given bin-len from a cache, because we just did attempt that.
// It still requires that bin-len is cacheable. (NO! it must work with the layering that I passed!)
// Then it finds the next cacheable
// Ready(None)
}
}
+7 -2
View File
@@ -3,6 +3,7 @@ use crate::rangefilter2::RangeFilter2;
use crate::tcprawclient::container_stream_from_bytes_stream;
use crate::tcprawclient::make_sub_query;
use crate::tcprawclient::OpenBoxedBytesStreamsBox;
use crate::timebin::cached::reader::EventsReadProvider;
use crate::timebin::CacheReadProvider;
use crate::timebin::TimeBinnedStream;
use crate::transform::build_merged_event_transform;
@@ -310,10 +311,11 @@ async fn timebinned_stream(
ctx: &ReqCtx,
open_bytes: OpenBoxedBytesStreamsBox,
cache_read_provider: Option<Arc<dyn CacheReadProvider>>,
events_read_provider: Option<Arc<dyn EventsReadProvider>>,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>, Error> {
use netpod::query::CacheUsage;
match (query.cache_usage(), cache_read_provider) {
(CacheUsage::Use | CacheUsage::Recreate, Some(cache_read_provider)) => {
match (query.cache_usage(), cache_read_provider, events_read_provider) {
(CacheUsage::Use | CacheUsage::Recreate, Some(cache_read_provider), Some(events_read_provider)) => {
let series = if let Some(x) = query.channel().series() {
x
} else {
@@ -351,6 +353,7 @@ async fn timebinned_stream(
do_time_weight,
bin_len_layers,
cache_read_provider,
events_read_provider,
)
.map_err(Error::from_string)?;
let stream = stream.map(|item| {
@@ -408,6 +411,7 @@ pub async fn timebinned_json(
ctx: &ReqCtx,
open_bytes: OpenBoxedBytesStreamsBox,
cache_read_provider: Option<Arc<dyn CacheReadProvider>>,
events_read_provider: Option<Arc<dyn EventsReadProvider>>,
) -> Result<JsonValue, Error> {
let deadline = Instant::now() + query.timeout_content().unwrap_or(Duration::from_millis(5000));
let binned_range = BinnedRangeEnum::covering_range(query.range().clone(), query.bin_count())?;
@@ -421,6 +425,7 @@ pub async fn timebinned_json(
ctx,
open_bytes,
cache_read_provider,
events_read_provider,
)
.await?;
let stream = timebinned_to_collectable(stream);