WIP on adding merger impls, it checks

This commit is contained in:
Dominik Werder
2021-06-07 20:36:41 +02:00
parent 5c7262c2c3
commit 908207b71b
9 changed files with 707 additions and 173 deletions
+48 -40
View File
@@ -2,8 +2,12 @@ use crate::agg::enp::XBinnedScalarEvents;
use crate::agg::eventbatch::MinMaxAvgScalarEventBatch;
use crate::agg::scalarbinbatch::MinMaxAvgScalarBinBatch;
use crate::agg::streams::StreamItem;
use crate::binned::{BinsTimeBinner, EventsTimeBinner, MinMaxAvgBins, NumOps, RangeCompletableItem, RangeOverlapInfo};
use crate::binned::{
BinsTimeBinner, EventsTimeBinner, EventsTimeBinnerAggregator, MinMaxAvgAggregator, MinMaxAvgBins, NumOps,
RangeCompletableItem, RangeOverlapInfo, SingleXBinAggregator,
};
use crate::decode::EventValues;
use crate::Sitemty;
use err::Error;
use futures_core::Stream;
use futures_util::StreamExt;
@@ -24,9 +28,10 @@ where
{
type Input = EventValues<NTY>;
type Output = MinMaxAvgBins<NTY>;
type Aggregator = MinMaxAvgAggregator<NTY>;
fn process(inp: Self::Input) -> Self::Output {
todo!()
fn aggregator(range: NanoRange) -> Self::Aggregator {
Self::Aggregator::new(range)
}
}
@@ -39,11 +44,13 @@ where
NTY: NumOps,
{
type Input = XBinnedScalarEvents<NTY>;
// TODO is that output type good enough for now?
// TODO is that output type good enough for now? Maybe better with a new one also
// to distinguish from the earlier one.
type Output = MinMaxAvgBins<NTY>;
type Aggregator = SingleXBinAggregator<NTY>;
fn process(inp: Self::Input) -> Self::Output {
todo!()
fn aggregator(range: NanoRange) -> Self::Aggregator {
Self::Aggregator::new(range)
}
}
@@ -134,54 +141,49 @@ impl Agg3 {
}
}
impl Aggregator3Tdim for Agg3 {
type InputValue = MinMaxAvgScalarEventBatch;
type OutputValue = MinMaxAvgScalarBinBatch;
}
pub struct BinnedT3Stream {
// TODO get rid of box:
inp: Pin<Box<dyn Stream<Item = Result<StreamItem<RangeCompletableItem<MinMaxAvgScalarEventBatch>>, Error>> + Send>>,
//aggtor: Option<<<SK as BinnedStreamKind>::XBinnedEvents as AggregatableTdim<SK>>::Aggregator>,
aggtor: Option<Agg3>,
pub struct TBinnerStream<S, ETB>
where
S: Stream<Item = Sitemty<<ETB as EventsTimeBinner>::Input>> + Send + Unpin + 'static,
ETB: EventsTimeBinner + Send + Unpin + 'static,
{
inp: S,
spec: BinnedRange,
curbin: u32,
left: Option<Poll<Option<Sitemty<<ETB as EventsTimeBinner>::Input>>>>,
aggtor: Option<<ETB as EventsTimeBinner>::Aggregator>,
tmp_agg_results: VecDeque<<<ETB as EventsTimeBinner>::Aggregator as EventsTimeBinnerAggregator>::Output>,
inp_completed: bool,
all_bins_emitted: bool,
range_complete_observed: bool,
range_complete_emitted: bool,
left: Option<Poll<Option<Result<StreamItem<RangeCompletableItem<MinMaxAvgScalarEventBatch>>, Error>>>>,
errored: bool,
completed: bool,
tmp_agg_results: VecDeque<MinMaxAvgScalarBinBatch>,
}
impl BinnedT3Stream {
pub fn new<S>(inp: S, spec: BinnedRange) -> Self
where
S: Stream<Item = Result<StreamItem<RangeCompletableItem<MinMaxAvgScalarEventBatch>>, Error>> + Send + 'static,
{
impl<S, ETB> TBinnerStream<S, ETB>
where
S: Stream<Item = Sitemty<<ETB as EventsTimeBinner>::Input>> + Send + Unpin + 'static,
ETB: EventsTimeBinner,
{
pub fn new(inp: S, spec: BinnedRange) -> Self {
let range = spec.get_range(0);
Self {
inp: Box::pin(inp),
aggtor: Some(Agg3::new(range)),
inp,
spec,
curbin: 0,
left: None,
aggtor: Some(<ETB as EventsTimeBinner>::aggregator(range)),
tmp_agg_results: VecDeque::new(),
inp_completed: false,
all_bins_emitted: false,
range_complete_observed: false,
range_complete_emitted: false,
left: None,
errored: false,
completed: false,
tmp_agg_results: VecDeque::new(),
}
}
fn cur(
&mut self,
cx: &mut Context,
) -> Poll<Option<Result<StreamItem<RangeCompletableItem<MinMaxAvgScalarEventBatch>>, Error>>> {
fn cur(&mut self, cx: &mut Context) -> Poll<Option<Sitemty<<ETB as EventsTimeBinner>::Input>>> {
if let Some(cur) = self.left.take() {
cur
} else if self.inp_completed {
@@ -197,11 +199,13 @@ impl BinnedT3Stream {
let range = self.spec.get_range(self.curbin);
let ret = self
.aggtor
.replace(Agg3::new(range))
.replace(<ETB as EventsTimeBinner>::aggregator(range))
// TODO handle None case, or remove Option if Agg is always present
.unwrap()
.result();
self.tmp_agg_results = VecDeque::from(ret);
// TODO should we accumulate bins before emit? Maybe not, we want to stay responsive.
// Only if the frequency would be high, that would require cpu time checks. Worth it? Measure..
self.tmp_agg_results.push_back(ret);
if self.curbin >= self.spec.count as u32 {
self.all_bins_emitted = true;
}
@@ -209,8 +213,8 @@ impl BinnedT3Stream {
fn handle(
&mut self,
cur: Poll<Option<Result<StreamItem<RangeCompletableItem<MinMaxAvgScalarEventBatch>>, Error>>>,
) -> Option<Poll<Option<Result<StreamItem<RangeCompletableItem<MinMaxAvgScalarBinBatch>>, Error>>>> {
cur: Poll<Option<Sitemty<<ETB as EventsTimeBinner>::Input>>>,
) -> Option<Poll<Option<Sitemty<<ETB as EventsTimeBinner>::Output>>>> {
use Poll::*;
match cur {
Ready(Some(Ok(item))) => match item {
@@ -228,9 +232,9 @@ impl BinnedT3Stream {
None
} else {
let ag = self.aggtor.as_mut().unwrap();
if item.ends_before(ag.range.clone()) {
if item.ends_before(ag.range().clone()) {
None
} else if item.starts_after(ag.range.clone()) {
} else if item.starts_after(ag.range().clone()) {
self.left =
Some(Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))))));
self.cycle_current_bin();
@@ -238,7 +242,7 @@ impl BinnedT3Stream {
None
} else {
ag.ingest(&item);
if item.ends_after(ag.range.clone()) {
if item.ends_after(ag.range().clone()) {
self.left =
Some(Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))))));
self.cycle_current_bin();
@@ -269,8 +273,12 @@ impl BinnedT3Stream {
}
}
impl Stream for BinnedT3Stream {
type Item = Result<StreamItem<RangeCompletableItem<MinMaxAvgScalarBinBatch>>, Error>;
impl<S, ETB> Stream for TBinnerStream<S, ETB>
where
S: Stream<Item = Sitemty<<ETB as EventsTimeBinner>::Input>> + Send + Unpin + 'static,
ETB: EventsTimeBinner + Send + Unpin + 'static,
{
type Item = Sitemty<<ETB as EventsTimeBinner>::Output>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
+135 -3
View File
@@ -1,5 +1,7 @@
use crate::binned::{EventsNodeProcessor, NumOps};
use crate::agg::streams::Appendable;
use crate::binned::{EventsNodeProcessor, NumOps, PushableIndex, RangeOverlapInfo, WithLen, WithTimestamps};
use crate::decode::EventValues;
use netpod::NanoRange;
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use std::marker::PhantomData;
@@ -29,6 +31,83 @@ pub struct XBinnedScalarEvents<NTY> {
xbincount: Vec<u32>,
}
impl<NTY> XBinnedScalarEvents<NTY> {
pub fn empty() -> Self {
Self {
tss: vec![],
mins: vec![],
maxs: vec![],
avgs: vec![],
xbincount: vec![],
}
}
}
impl<NTY> WithLen for XBinnedScalarEvents<NTY> {
fn len(&self) -> usize {
self.tss.len()
}
}
impl<NTY> WithTimestamps for XBinnedScalarEvents<NTY> {
fn ts(&self, ix: usize) -> u64 {
self.tss[ix]
}
}
impl<NTY> RangeOverlapInfo for XBinnedScalarEvents<NTY> {
fn ends_before(&self, range: NanoRange) -> bool {
match self.tss.last() {
Some(&ts) => ts < range.beg,
None => true,
}
}
fn ends_after(&self, range: NanoRange) -> bool {
match self.tss.last() {
Some(&ts) => ts >= range.end,
None => panic!(),
}
}
fn starts_after(&self, range: NanoRange) -> bool {
match self.tss.first() {
Some(&ts) => ts >= range.end,
None => panic!(),
}
}
}
impl<NTY> PushableIndex for XBinnedScalarEvents<NTY>
where
NTY: NumOps,
{
fn push_index(&mut self, src: &Self, ix: usize) {
self.tss.push(src.tss[ix]);
self.xbincount.push(src.xbincount[ix]);
self.mins.push(src.mins[ix]);
self.maxs.push(src.maxs[ix]);
self.avgs.push(src.avgs[ix]);
}
}
impl<NTY> Appendable for XBinnedScalarEvents<NTY>
where
NTY: NumOps,
{
fn empty() -> Self {
Self::empty()
}
fn append(&mut self, src: &Self) {
self.tss.extend_from_slice(&src.tss);
self.xbincount.extend_from_slice(&src.xbincount);
self.mins.extend_from_slice(&src.mins);
self.maxs.extend_from_slice(&src.maxs);
self.avgs.extend_from_slice(&src.avgs);
}
}
pub struct WaveXBinner<NTY> {
_m1: PhantomData<NTY>,
}
@@ -40,7 +119,60 @@ where
type Input = Vec<NTY>;
type Output = XBinnedScalarEvents<NTY>;
fn process(_inp: EventValues<Self::Input>) -> Self::Output {
todo!()
fn process(inp: EventValues<Self::Input>) -> Self::Output {
let nev = inp.tss.len();
let mut ret = XBinnedScalarEvents {
tss: inp.tss,
xbincount: Vec::with_capacity(nev),
mins: Vec::with_capacity(nev),
maxs: Vec::with_capacity(nev),
avgs: Vec::with_capacity(nev),
};
for i1 in 0..nev {
let mut min = None;
let mut max = None;
let mut sum = 0f32;
let mut count = 0;
let vals = &inp.values[i1];
for i2 in 0..vals.len() {
let v = vals[i2];
min = match min {
None => Some(v),
Some(min) => {
if v < min {
Some(v)
} else {
Some(min)
}
}
};
max = match max {
None => Some(v),
Some(max) => {
if v > max {
Some(v)
} else {
Some(max)
}
}
};
let vf = v.as_();
if vf.is_nan() {
} else {
sum += vf;
count += 1;
}
}
// TODO while X-binning I expect values, otherwise it is illegal input.
ret.xbincount.push(nev as u32);
ret.mins.push(min.unwrap());
ret.maxs.push(max.unwrap());
if count == 0 {
ret.avgs.push(f32::NAN);
} else {
ret.avgs.push(sum / count as f32);
}
}
ret
}
}