This commit is contained in:
Dominik Werder
2021-06-14 23:10:14 +02:00
parent bb6d853b78
commit bebce14f56
9 changed files with 881 additions and 67 deletions

View File

@@ -23,13 +23,14 @@ use futures_util::{FutureExt, StreamExt};
use netpod::log::*;
use netpod::timeunits::SEC;
use netpod::{
BinnedRange, ByteOrder, NanoRange, NodeConfigCached, PerfOpts, PreBinnedPatchIterator, PreBinnedPatchRange,
ScalarType, Shape,
AggKind, BinnedRange, ByteOrder, NanoRange, NodeConfigCached, PerfOpts, PreBinnedPatchIterator,
PreBinnedPatchRange, ScalarType, Shape,
};
use num_traits::{AsPrimitive, Bounded, Zero};
use num_traits::{AsPrimitive, Bounded, Float, Zero};
use parse::channelconfig::{extract_matching_config_entry, read_local_config, MatchingConfigEntry};
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize, Serializer};
use std::fmt;
use std::fmt::Debug;
use std::future::Future;
use std::marker::PhantomData;
@@ -122,8 +123,13 @@ where
range: query.range().clone(),
agg_kind: query.agg_kind().clone(),
};
let x_bin_count = if let AggKind::DimXBinsN(n) = query.agg_kind() {
*n as usize
} else {
0
};
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, node_config.node_config.cluster.clone());
let s = TBinnerStream::<_, <ENP as EventsNodeProcessor>::Output>::new(s, range);
let s = TBinnerStream::<_, <ENP as EventsNodeProcessor>::Output>::new(s, range, x_bin_count);
let ret = BinnedResponseStat {
stream: Box::pin(s),
bin_count,
@@ -770,13 +776,47 @@ pub trait NumOps:
+ Serialize
+ DeserializeOwned
{
fn min_or_nan() -> Self;
fn max_or_nan() -> Self;
fn is_nan(&self) -> bool;
}
impl<T> NumOps for T where
T: Send + Unpin + Debug + Zero + AsPrimitive<f32> + Bounded + PartialOrd + SubFrId + Serialize + DeserializeOwned
{
fn tmp() {}
macro_rules! impl_num_ops {
($ty:ident, $min_or_nan:ident, $max_or_nan:ident, $is_nan:ident) => {
impl NumOps for $ty {
fn min_or_nan() -> Self {
$ty::$min_or_nan
}
fn max_or_nan() -> Self {
$ty::$max_or_nan
}
fn is_nan(&self) -> bool {
$is_nan(self)
}
}
};
}
fn is_nan_int<T>(x: &T) -> bool {
false
}
fn is_nan_float<T: Float>(x: &T) -> bool {
x.is_nan()
}
impl_num_ops!(u8, MIN, MAX, is_nan_int);
impl_num_ops!(u16, MIN, MAX, is_nan_int);
impl_num_ops!(u32, MIN, MAX, is_nan_int);
impl_num_ops!(u64, MIN, MAX, is_nan_int);
impl_num_ops!(i8, MIN, MAX, is_nan_int);
impl_num_ops!(i16, MIN, MAX, is_nan_int);
impl_num_ops!(i32, MIN, MAX, is_nan_int);
impl_num_ops!(i64, MIN, MAX, is_nan_int);
impl_num_ops!(f32, NAN, NAN, is_nan_float);
impl_num_ops!(f64, NAN, NAN, is_nan_float);
pub trait EventsDecoder {
type Output;
fn ingest(&mut self, event: &[u8]);
@@ -786,7 +826,8 @@ pub trait EventsDecoder {
pub trait EventsNodeProcessor: Send + Unpin {
type Input;
type Output: Send + Unpin + DeserializeOwned + WithTimestamps + TimeBinnableType;
fn process(inp: EventValues<Self::Input>) -> Self::Output;
fn create(shape: Shape) -> Self;
fn process(&self, inp: EventValues<Self::Input>) -> Self::Output;
}
pub trait TimeBins: Send + Unpin + WithLen + Appendable + FilterFittingInside {
@@ -799,16 +840,17 @@ pub struct MinMaxAvgBins<NTY> {
pub ts1s: Vec<u64>,
pub ts2s: Vec<u64>,
pub counts: Vec<u64>,
// TODO get rid of Option:
pub mins: Vec<Option<NTY>>,
pub maxs: Vec<Option<NTY>>,
pub avgs: Vec<Option<f32>>,
}
impl<NTY> std::fmt::Debug for MinMaxAvgBins<NTY>
impl<NTY> fmt::Debug for MinMaxAvgBins<NTY>
where
NTY: std::fmt::Debug,
NTY: fmt::Debug,
{
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(
fmt,
"MinMaxAvgBins count {} ts1s {:?} ts2s {:?} counts {:?} mins {:?} maxs {:?} avgs {:?}",
@@ -951,7 +993,7 @@ where
type Output = MinMaxAvgBins<NTY>;
type Aggregator = MinMaxAvgBinsAggregator<NTY>;
fn aggregator(range: NanoRange) -> Self::Aggregator {
fn aggregator(range: NanoRange, bin_count: usize) -> Self::Aggregator {
Self::Aggregator::new(range)
}
}
@@ -1103,7 +1145,8 @@ impl<NTY> EventValuesAggregator<NTY> {
Self {
range,
count: 0,
min: None,
// TODO get rid of Option
min: err::todoval(),
max: None,
sumc: 0,
sum: 0f32,
@@ -1282,3 +1325,400 @@ pub enum RangeCompletableItem<T> {
RangeComplete,
Data(T),
}
#[derive(Clone, Serialize, Deserialize)]
pub struct MinMaxAvgWaveBins<NTY> {
pub ts1s: Vec<u64>,
pub ts2s: Vec<u64>,
pub counts: Vec<u64>,
pub mins: Vec<Option<Vec<NTY>>>,
pub maxs: Vec<Option<Vec<NTY>>>,
pub avgs: Vec<Option<Vec<f32>>>,
}
impl<NTY> fmt::Debug for MinMaxAvgWaveBins<NTY>
where
NTY: fmt::Debug,
{
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(
fmt,
"MinMaxAvgBins count {} ts1s {:?} ts2s {:?} counts {:?} mins {:?} maxs {:?} avgs {:?}",
self.ts1s.len(),
self.ts1s.iter().map(|k| k / SEC).collect::<Vec<_>>(),
self.ts2s.iter().map(|k| k / SEC).collect::<Vec<_>>(),
self.counts,
self.mins,
self.maxs,
self.avgs,
)
}
}
impl<NTY> MinMaxAvgWaveBins<NTY> {
pub fn empty() -> Self {
Self {
ts1s: vec![],
ts2s: vec![],
counts: vec![],
mins: vec![],
maxs: vec![],
avgs: vec![],
}
}
}
impl<NTY> FitsInside for MinMaxAvgWaveBins<NTY> {
fn fits_inside(&self, range: NanoRange) -> Fits {
if self.ts1s.is_empty() {
Fits::Empty
} else {
let t1 = *self.ts1s.first().unwrap();
let t2 = *self.ts2s.last().unwrap();
if t2 <= range.beg {
Fits::Lower
} else if t1 >= range.end {
Fits::Greater
} else if t1 < range.beg && t2 > range.end {
Fits::PartlyLowerAndGreater
} else if t1 < range.beg {
Fits::PartlyLower
} else if t2 > range.end {
Fits::PartlyGreater
} else {
Fits::Inside
}
}
}
}
impl<NTY> FilterFittingInside for MinMaxAvgWaveBins<NTY> {
fn filter_fitting_inside(self, fit_range: NanoRange) -> Option<Self> {
match self.fits_inside(fit_range) {
Fits::Inside | Fits::PartlyGreater | Fits::PartlyLower | Fits::PartlyLowerAndGreater => Some(self),
_ => None,
}
}
}
impl<NTY> RangeOverlapInfo for MinMaxAvgWaveBins<NTY> {
fn ends_before(&self, range: NanoRange) -> bool {
match self.ts2s.last() {
Some(&ts) => ts <= range.beg,
None => true,
}
}
fn ends_after(&self, range: NanoRange) -> bool {
match self.ts2s.last() {
Some(&ts) => ts > range.end,
None => panic!(),
}
}
fn starts_after(&self, range: NanoRange) -> bool {
match self.ts1s.first() {
Some(&ts) => ts >= range.end,
None => panic!(),
}
}
}
impl<NTY> TimeBins for MinMaxAvgWaveBins<NTY>
where
NTY: NumOps,
{
fn ts1s(&self) -> &Vec<u64> {
&self.ts1s
}
fn ts2s(&self) -> &Vec<u64> {
&self.ts2s
}
}
impl<NTY> WithLen for MinMaxAvgWaveBins<NTY> {
fn len(&self) -> usize {
self.ts1s.len()
}
}
impl<NTY> Appendable for MinMaxAvgWaveBins<NTY>
where
NTY: NumOps,
{
fn empty() -> Self {
Self::empty()
}
fn append(&mut self, src: &Self) {
self.ts1s.extend_from_slice(&src.ts1s);
self.ts2s.extend_from_slice(&src.ts2s);
self.counts.extend_from_slice(&src.counts);
self.mins.extend_from_slice(&src.mins);
self.maxs.extend_from_slice(&src.maxs);
self.avgs.extend_from_slice(&src.avgs);
}
}
impl<NTY> ReadableFromFile for MinMaxAvgWaveBins<NTY>
where
NTY: NumOps,
{
// TODO this function is not needed in the trait:
fn read_from_file(file: File) -> Result<ReadPbv<Self>, Error> {
Ok(ReadPbv::new(file))
}
fn from_buf(buf: &[u8]) -> Result<Self, Error> {
let dec = serde_cbor::from_slice(&buf)?;
Ok(dec)
}
}
impl<NTY> TimeBinnableType for MinMaxAvgWaveBins<NTY>
where
NTY: NumOps,
{
type Output = MinMaxAvgWaveBins<NTY>;
type Aggregator = MinMaxAvgWaveBinsAggregator<NTY>;
fn aggregator(range: NanoRange, x_bin_count: usize) -> Self::Aggregator {
Self::Aggregator::new(range, x_bin_count)
}
}
impl<NTY> ToJsonResult for Sitemty<MinMaxAvgWaveBins<NTY>>
where
NTY: NumOps,
{
fn to_json_result(&self) -> Result<Box<dyn ToJsonBytes>, Error> {
Ok(Box::new(serde_json::Value::String(format!(
"MinMaxAvgBins/non-json-item"
))))
}
}
pub struct MinMaxAvgWaveBinsCollected<NTY> {
_m1: PhantomData<NTY>,
}
impl<NTY> MinMaxAvgWaveBinsCollected<NTY> {
pub fn new() -> Self {
Self { _m1: PhantomData }
}
}
#[derive(Serialize)]
pub struct MinMaxAvgWaveBinsCollectedResult<NTY> {
ts0: u64,
tsoff: Vec<u64>,
//ts_bin_edges: Vec<IsoDateTime>,
counts: Vec<u64>,
mins: Vec<Option<Vec<NTY>>>,
maxs: Vec<Option<Vec<NTY>>>,
avgs: Vec<Option<Vec<f32>>>,
#[serde(skip_serializing_if = "Bool::is_false", rename = "finalisedRange")]
finalised_range: bool,
#[serde(skip_serializing_if = "Zero::is_zero", rename = "missingBins")]
missing_bins: u32,
#[serde(skip_serializing_if = "Option::is_none", rename = "continueAt")]
//continue_at: Option<IsoDateTime>,
continue_at: Option<u64>,
}
pub struct MinMaxAvgWaveBinsCollector<NTY> {
bin_count_exp: u32,
timed_out: bool,
range_complete: bool,
vals: MinMaxAvgWaveBins<NTY>,
_m1: PhantomData<NTY>,
}
impl<NTY> MinMaxAvgWaveBinsCollector<NTY> {
pub fn new(bin_count_exp: u32) -> Self {
Self {
bin_count_exp,
timed_out: false,
range_complete: false,
vals: MinMaxAvgWaveBins::<NTY>::empty(),
_m1: PhantomData,
}
}
}
impl<NTY> WithLen for MinMaxAvgWaveBinsCollector<NTY>
where
NTY: NumOps + Serialize,
{
fn len(&self) -> usize {
self.vals.ts1s.len()
}
}
impl<NTY> Collector for MinMaxAvgWaveBinsCollector<NTY>
where
NTY: NumOps + Serialize,
{
type Input = MinMaxAvgWaveBins<NTY>;
type Output = MinMaxAvgWaveBinsCollectedResult<NTY>;
fn ingest(&mut self, src: &Self::Input) {
Appendable::append(&mut self.vals, src);
}
fn set_range_complete(&mut self) {
self.range_complete = true;
}
fn set_timed_out(&mut self) {
self.timed_out = true;
}
fn result(self) -> Result<Self::Output, Error> {
let ts0 = self.vals.ts1s.first().map_or(0, |k| *k / SEC);
let bin_count = self.vals.ts1s.len() as u32;
let mut tsoff: Vec<_> = self.vals.ts1s.iter().map(|k| *k - ts0 * SEC).collect();
if let Some(&k) = self.vals.ts2s.last() {
tsoff.push(k - ts0 * SEC);
}
let tsoff = tsoff;
let _iso: Vec<_> = tsoff
.iter()
.map(|&k| IsoDateTime(Utc.timestamp_nanos(k as i64)))
.collect();
let continue_at = if self.vals.ts1s.len() < self.bin_count_exp as usize {
match tsoff.last() {
Some(k) => Some(k.clone()),
None => Err(Error::with_msg("partial_content but no bin in result"))?,
}
} else {
None
};
let ret = MinMaxAvgWaveBinsCollectedResult {
ts0,
tsoff,
counts: self.vals.counts,
mins: self.vals.mins,
maxs: self.vals.maxs,
avgs: self.vals.avgs,
finalised_range: self.range_complete,
missing_bins: self.bin_count_exp - bin_count,
continue_at,
};
Ok(ret)
}
}
impl<NTY> Collectable for MinMaxAvgWaveBins<NTY>
where
NTY: NumOps + Serialize,
{
type Collector = MinMaxAvgWaveBinsCollector<NTY>;
fn new_collector(bin_count_exp: u32) -> Self::Collector {
Self::Collector::new(bin_count_exp)
}
}
pub struct MinMaxAvgWaveBinsAggregator<NTY> {
range: NanoRange,
count: u64,
min: Vec<NTY>,
max: Vec<NTY>,
sum: Vec<f32>,
sumc: u64,
}
impl<NTY> MinMaxAvgWaveBinsAggregator<NTY>
where
NTY: NumOps,
{
pub fn new(range: NanoRange, x_bin_count: usize) -> Self {
Self {
range,
count: 0,
min: vec![NTY::min_or_nan(); x_bin_count],
max: vec![NTY::max_or_nan(); x_bin_count],
sum: vec![0f32; x_bin_count],
sumc: 0,
}
}
}
impl<NTY> TimeBinnableTypeAggregator for MinMaxAvgWaveBinsAggregator<NTY>
where
NTY: NumOps,
{
type Input = MinMaxAvgWaveBins<NTY>;
type Output = MinMaxAvgWaveBins<NTY>;
fn range(&self) -> &NanoRange {
&self.range
}
fn ingest(&mut self, item: &Self::Input) {
for i1 in 0..item.ts1s.len() {
if item.ts2s[i1] <= self.range.beg {
continue;
} else if item.ts1s[i1] >= self.range.end {
continue;
} else {
// the input can contain bins where no events did fall into.
match &item.mins[i1] {
None => {}
Some(inp) => {
for (a, b) in self.min.iter_mut().zip(inp.iter()) {
if *b < *a {
*a = *b;
}
}
}
}
match &item.maxs[i1] {
None => {}
Some(inp) => {
for (a, b) in self.max.iter_mut().zip(inp.iter()) {
if *b > *a {
*a = *b;
}
}
}
}
match &item.avgs[i1] {
None => {}
Some(inp) => {
for (a, b) in self.sum.iter_mut().zip(inp.iter()) {
*a += *b;
}
}
}
self.sumc += 1;
self.count += item.counts[i1];
}
}
}
fn result(self) -> Self::Output {
if self.sumc == 0 {
Self::Output {
ts1s: vec![self.range.beg],
ts2s: vec![self.range.end],
counts: vec![self.count],
mins: vec![None],
maxs: vec![None],
avgs: vec![None],
}
} else {
let avg = self.sum.iter().map(|j| *j / self.sumc as f32).collect();
Self::Output {
ts1s: vec![self.range.beg],
ts2s: vec![self.range.end],
counts: vec![self.count],
mins: vec![Some(self.min)],
maxs: vec![Some(self.max)],
avgs: vec![Some(avg)],
}
}
}
}