Refactor number pipeline for simple cases
This commit is contained in:
@@ -0,0 +1,261 @@
|
||||
use crate::agg::binnedt4::{TBinnerStream, TimeBinnableType, TimeBinnableTypeAggregator};
|
||||
use crate::agg::streams::StreamItem;
|
||||
use crate::binned::query::{CacheUsage, PreBinnedQuery};
|
||||
use crate::binned::{EventsTimeBinner, RangeCompletableItem};
|
||||
use crate::cache::{node_ix_for_patch, HttpBodyAsAsyncRead};
|
||||
use crate::frame::inmem::InMemoryFrameAsyncReadStream;
|
||||
use crate::frame::makeframe::{decode_frame, FrameType};
|
||||
use crate::Sitemty;
|
||||
use err::Error;
|
||||
use futures_core::Stream;
|
||||
use futures_util::{FutureExt, StreamExt};
|
||||
use http::{StatusCode, Uri};
|
||||
use netpod::log::*;
|
||||
use netpod::{AggKind, BinnedRange, ByteSize, Channel, NodeConfigCached, PerfOpts, PreBinnedPatchIterator};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::Deserialize;
|
||||
use std::future::ready;
|
||||
use std::marker::PhantomData;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
pub struct FetchedPreBinned<TBT> {
|
||||
uri: Uri,
|
||||
resfut: Option<hyper::client::ResponseFuture>,
|
||||
res: Option<InMemoryFrameAsyncReadStream<HttpBodyAsAsyncRead>>,
|
||||
errored: bool,
|
||||
completed: bool,
|
||||
_m1: PhantomData<TBT>,
|
||||
}
|
||||
|
||||
impl<TBT> FetchedPreBinned<TBT> {
|
||||
pub fn new(query: &PreBinnedQuery, node_config: &NodeConfigCached) -> Result<Self, Error> {
|
||||
let nodeix = node_ix_for_patch(&query.patch(), &query.channel(), &node_config.node_config.cluster);
|
||||
let node = &node_config.node_config.cluster.nodes[nodeix as usize];
|
||||
let uri: hyper::Uri = format!(
|
||||
"http://{}:{}/api/4/prebinned?{}",
|
||||
node.host,
|
||||
node.port,
|
||||
query.make_query_string()
|
||||
)
|
||||
.parse()?;
|
||||
let ret = Self {
|
||||
uri,
|
||||
resfut: None,
|
||||
res: None,
|
||||
errored: false,
|
||||
completed: false,
|
||||
_m1: PhantomData,
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl<TBT> Stream for FetchedPreBinned<TBT>
|
||||
where
|
||||
TBT: TimeBinnableType,
|
||||
Sitemty<TBT>: FrameType + DeserializeOwned,
|
||||
{
|
||||
type Item = Sitemty<TBT>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
'outer: loop {
|
||||
break if self.completed {
|
||||
panic!("poll_next on completed");
|
||||
} else if self.errored {
|
||||
self.completed = true;
|
||||
return Ready(None);
|
||||
} else if let Some(res) = self.res.as_mut() {
|
||||
match res.poll_next_unpin(cx) {
|
||||
Ready(Some(Ok(item))) => match item {
|
||||
StreamItem::Log(item) => Ready(Some(Ok(StreamItem::Log(item)))),
|
||||
StreamItem::Stats(item) => Ready(Some(Ok(StreamItem::Stats(item)))),
|
||||
StreamItem::DataItem(item) => match decode_frame::<Sitemty<TBT>>(&item) {
|
||||
Ok(Ok(item)) => Ready(Some(Ok(item))),
|
||||
Ok(Err(e)) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Err(e) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
},
|
||||
},
|
||||
Ready(Some(Err(e))) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Ready(None) => {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
} else if let Some(resfut) = self.resfut.as_mut() {
|
||||
match resfut.poll_unpin(cx) {
|
||||
Ready(res) => match res {
|
||||
Ok(res) => {
|
||||
if res.status() == StatusCode::OK {
|
||||
let perf_opts = PerfOpts { inmem_bufcap: 512 };
|
||||
let s1 = HttpBodyAsAsyncRead::new(res);
|
||||
let s2 = InMemoryFrameAsyncReadStream::new(s1, perf_opts.inmem_bufcap);
|
||||
self.res = Some(s2);
|
||||
continue 'outer;
|
||||
} else {
|
||||
error!(
|
||||
"PreBinnedValueFetchedStream got non-OK result from sub request: {:?}",
|
||||
res
|
||||
);
|
||||
let e = Error::with_msg(format!(
|
||||
"PreBinnedValueFetchedStream got non-OK result from sub request: {:?}",
|
||||
res
|
||||
));
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("PreBinnedValueStream error in stream {:?}", e);
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e.into())))
|
||||
}
|
||||
},
|
||||
Pending => Pending,
|
||||
}
|
||||
} else {
|
||||
match hyper::Request::builder()
|
||||
.method(http::Method::GET)
|
||||
.uri(&self.uri)
|
||||
.body(hyper::Body::empty())
|
||||
{
|
||||
Ok(req) => {
|
||||
let client = hyper::Client::new();
|
||||
self.resfut = Some(client.request(req));
|
||||
continue 'outer;
|
||||
}
|
||||
Err(e) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e.into())))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate bins from a range of pre-binned patches.
|
||||
///
|
||||
/// Takes an iterator over the necessary patches.
|
||||
pub struct BinnedFromPreBinned<TBT>
|
||||
where
|
||||
TBT: TimeBinnableType,
|
||||
{
|
||||
// TODO get rid of box:
|
||||
inp: Pin<Box<dyn Stream<Item = Sitemty<TBT>> + Send>>,
|
||||
_m1: PhantomData<TBT>,
|
||||
}
|
||||
|
||||
impl<TBT> BinnedFromPreBinned<TBT>
|
||||
where
|
||||
TBT: TimeBinnableType<Output = TBT> + Unpin + 'static,
|
||||
Sitemty<TBT>: FrameType + DeserializeOwned,
|
||||
{
|
||||
pub fn new(
|
||||
patch_it: PreBinnedPatchIterator,
|
||||
channel: Channel,
|
||||
range: BinnedRange,
|
||||
agg_kind: AggKind,
|
||||
cache_usage: CacheUsage,
|
||||
node_config: &NodeConfigCached,
|
||||
disk_stats_every: ByteSize,
|
||||
report_error: bool,
|
||||
) -> Result<Self, Error> {
|
||||
let patches: Vec<_> = patch_it.collect();
|
||||
let mut sp = String::new();
|
||||
if false {
|
||||
// Convert this to a StreamLog message:
|
||||
for (i, p) in patches.iter().enumerate() {
|
||||
use std::fmt::Write;
|
||||
write!(sp, " • patch {:2} {:?}\n", i, p)?;
|
||||
}
|
||||
info!("Using these pre-binned patches:\n{}", sp);
|
||||
}
|
||||
let pmax = patches.len();
|
||||
let inp = futures_util::stream::iter(patches.into_iter().enumerate())
|
||||
.map({
|
||||
let node_config = node_config.clone();
|
||||
move |(pix, patch)| {
|
||||
let query = PreBinnedQuery::new(
|
||||
patch,
|
||||
channel.clone(),
|
||||
agg_kind.clone(),
|
||||
cache_usage.clone(),
|
||||
disk_stats_every.clone(),
|
||||
report_error,
|
||||
);
|
||||
let ret: Pin<Box<dyn Stream<Item = _> + Send>> =
|
||||
match FetchedPreBinned::<TBT>::new(&query, &node_config) {
|
||||
Ok(stream) => Box::pin(stream.map(move |q| (pix, q))),
|
||||
Err(e) => {
|
||||
error!("error from PreBinnedValueFetchedStream::new {:?}", e);
|
||||
Box::pin(futures_util::stream::iter(vec![(pix, Err(e))]))
|
||||
}
|
||||
};
|
||||
ret
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
.filter_map({
|
||||
let range = range.clone();
|
||||
move |(pix, k)| {
|
||||
let fit_range = range.full_range();
|
||||
let g = match k {
|
||||
Ok(item) => match item {
|
||||
StreamItem::Log(item) => Some(Ok(StreamItem::Log(item))),
|
||||
StreamItem::Stats(item) => Some(Ok(StreamItem::Stats(item))),
|
||||
StreamItem::DataItem(item) => match item {
|
||||
RangeCompletableItem::RangeComplete => {
|
||||
if pix + 1 == pmax {
|
||||
Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
RangeCompletableItem::Data(item) => {
|
||||
match crate::binned::FilterFittingInside::filter_fitting_inside(item, fit_range) {
|
||||
Some(item) => Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
Err(e) => Some(Err(e)),
|
||||
};
|
||||
ready(g)
|
||||
}
|
||||
});
|
||||
|
||||
// TODO TBinnerStream is for T-binning events.
|
||||
// But here, we need to bin bins into bigger bins.
|
||||
// The logic in TBinnerStream is actually the same I think..
|
||||
// Reuse??
|
||||
let inp = TBinnerStream::<_, TBT>::new(inp, range);
|
||||
Ok(Self {
|
||||
inp: Box::pin(inp),
|
||||
_m1: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<TBT> Stream for BinnedFromPreBinned<TBT>
|
||||
where
|
||||
TBT: TimeBinnableType,
|
||||
{
|
||||
type Item = Sitemty<TBT>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
self.inp.poll_next_unpin(cx)
|
||||
}
|
||||
}
|
||||
+29
-22
@@ -1,4 +1,4 @@
|
||||
use crate::agg::binnedt4::TBinnerStream;
|
||||
use crate::agg::binnedt4::{TBinnerStream, TimeBinnableType};
|
||||
use crate::agg::streams::{Appendable, StreamItem};
|
||||
use crate::binned::query::{CacheUsage, PreBinnedQuery};
|
||||
use crate::binned::{
|
||||
@@ -9,7 +9,7 @@ use crate::cache::pbvfs::PreBinnedScalarValueFetchedStream;
|
||||
use crate::cache::{write_pb_cache_min_max_avg_scalar, CacheFileDesc, MergedFromRemotes, WrittenPbCache};
|
||||
use crate::decode::{Endianness, EventValueFromBytes, EventValueShape, NumFromBytes};
|
||||
use crate::frame::makeframe::{make_frame, FrameType};
|
||||
use crate::merge::mergefromremote::MergedFromRemotes2;
|
||||
use crate::merge::mergedfromremotes::MergedFromRemotes2;
|
||||
use crate::raw::EventsQuery;
|
||||
use crate::streamlog::Streamlog;
|
||||
use crate::Sitemty;
|
||||
@@ -40,7 +40,9 @@ where
|
||||
query: PreBinnedQuery,
|
||||
node_config: NodeConfigCached,
|
||||
open_check_local_file: Option<Pin<Box<dyn Future<Output = Result<File, io::Error>> + Send>>>,
|
||||
fut2: Option<Pin<Box<dyn Stream<Item = Sitemty<<ETB as EventsTimeBinner>::Output>> + Send>>>,
|
||||
fut2: Option<
|
||||
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
|
||||
>,
|
||||
read_from_cache: bool,
|
||||
cache_written: bool,
|
||||
data_complete: bool,
|
||||
@@ -49,9 +51,15 @@ where
|
||||
errored: bool,
|
||||
completed: bool,
|
||||
streamlog: Streamlog,
|
||||
values: <ETB as EventsTimeBinner>::Output,
|
||||
values: <<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output,
|
||||
write_fut: Option<Pin<Box<dyn Future<Output = Result<WrittenPbCache, Error>> + Send>>>,
|
||||
read_cache_fut: Option<Pin<Box<dyn Future<Output = Sitemty<<ETB as EventsTimeBinner>::Output>> + Send>>>,
|
||||
read_cache_fut: Option<
|
||||
Pin<
|
||||
Box<
|
||||
dyn Future<Output = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send,
|
||||
>,
|
||||
>,
|
||||
>,
|
||||
_m1: PhantomData<NTY>,
|
||||
_m2: PhantomData<END>,
|
||||
_m3: PhantomData<EVS>,
|
||||
@@ -84,7 +92,8 @@ where
|
||||
errored: false,
|
||||
completed: false,
|
||||
streamlog: Streamlog::new(node_config.ix as u32),
|
||||
values: <<ETB as EventsTimeBinner>::Output as Appendable>::empty(),
|
||||
// TODO use alias via some trait associated type:
|
||||
values: <<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output as Appendable>::empty(),
|
||||
write_fut: None,
|
||||
read_cache_fut: None,
|
||||
_m1: PhantomData,
|
||||
@@ -97,7 +106,10 @@ where
|
||||
|
||||
fn setup_merged_from_remotes(
|
||||
&mut self,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<<ETB as EventsTimeBinner>::Output>> + Send>>, Error> {
|
||||
) -> Result<
|
||||
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
|
||||
Error,
|
||||
> {
|
||||
let evq = EventsQuery {
|
||||
channel: self.query.channel().clone(),
|
||||
range: self.query.patch().patch_range(),
|
||||
@@ -117,24 +129,18 @@ where
|
||||
let range = BinnedRange::covering_range(evq.range.clone(), count as u32)?
|
||||
.ok_or(Error::with_msg("covering_range returns None"))?;
|
||||
let perf_opts = PerfOpts { inmem_bufcap: 512 };
|
||||
|
||||
// TODO copy the MergedFromRemotes and adapt...
|
||||
let s1 = MergedFromRemotes2::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster.clone());
|
||||
|
||||
// TODO
|
||||
// Go from ENP values to a T-binned stream...
|
||||
// Most of the algo is static same.
|
||||
// What varies: init aggregator for next T-bin.
|
||||
let ret = TBinnerStream::<_, ETB>::new(s1, range);
|
||||
|
||||
//let s1 = todo_convert_stream_to_tbinned_stream(s1, range);
|
||||
let s = MergedFromRemotes2::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster.clone());
|
||||
let ret = TBinnerStream::<_, <ENP as EventsNodeProcessor>::Output>::new(s, range);
|
||||
Ok(Box::pin(ret))
|
||||
}
|
||||
|
||||
fn setup_from_higher_res_prebinned(
|
||||
&mut self,
|
||||
range: PreBinnedPatchRange,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<<ETB as EventsTimeBinner>::Output>> + Send>>, Error> {
|
||||
) -> Result<
|
||||
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
|
||||
Error,
|
||||
> {
|
||||
let g = self.query.patch().bin_t_len();
|
||||
let h = range.grid_spec.bin_t_len();
|
||||
trace!(
|
||||
@@ -216,7 +222,7 @@ where
|
||||
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType,
|
||||
<ETB as EventsTimeBinner>::Output: Serialize + ReadableFromFile + 'static,
|
||||
{
|
||||
type Item = Sitemty<<ETB as EventsTimeBinner>::Output>;
|
||||
type Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
@@ -290,7 +296,8 @@ where
|
||||
self.values.len(),
|
||||
);
|
||||
self.streamlog.append(Level::INFO, msg);
|
||||
let emp = <<ETB as EventsTimeBinner>::Output as Appendable>::empty();
|
||||
// TODO use alias vias trait:
|
||||
let emp = <<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output as Appendable>::empty();
|
||||
let values = std::mem::replace(&mut self.values, emp);
|
||||
let fut = write_pb_cache_min_max_avg_scalar(
|
||||
values,
|
||||
@@ -344,7 +351,7 @@ where
|
||||
Ok(file) => {
|
||||
self.read_from_cache = true;
|
||||
let fut =
|
||||
<<ETB as EventsTimeBinner>::Output as ReadableFromFile>::read_from_file(file)?;
|
||||
<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output as ReadableFromFile>::read_from_file(file)?;
|
||||
self.read_cache_fut = Some(Box::pin(fut));
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use crate::agg::binnedt4::{DefaultBinsTimeBinner, DefaultScalarEventsTimeBinner, DefaultSingleXBinTimeBinner};
|
||||
use crate::agg::binnedt4::{
|
||||
DefaultBinsTimeBinner, DefaultScalarEventsTimeBinner, DefaultSingleXBinTimeBinner, TimeBinnableType,
|
||||
};
|
||||
use crate::agg::enp::{Identity, WaveXBinner};
|
||||
use crate::agg::streams::{Appendable, StreamItem};
|
||||
use crate::binned::pbv2::{
|
||||
@@ -41,6 +43,7 @@ where
|
||||
<ETB as EventsTimeBinner>::Output: Serialize + ReadableFromFile + 'static,
|
||||
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType + Framable + 'static,
|
||||
Sitemty<<ETB as EventsTimeBinner>::Output>: Framable,
|
||||
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>: Framable,
|
||||
{
|
||||
// TODO
|
||||
// Currently, this mod uses stuff from pbv2, therefore complete path:
|
||||
|
||||
Reference in New Issue
Block a user