Add alternative time binner

This commit is contained in:
Dominik Werder
2022-11-18 12:59:55 +01:00
parent eebf8665ce
commit b3225ae4c1
17 changed files with 1283 additions and 177 deletions

View File

@@ -5,6 +5,7 @@ use crate::{Empty, Events, ScalarOps, WithLen};
use crate::{TimeBinnable, TimeBinnableType, TimeBinnableTypeAggregator, TimeBinner};
use err::Error;
use netpod::log::*;
use netpod::timeunits::SEC;
use netpod::NanoRange;
use serde::{Deserialize, Serialize};
use std::any::Any;
@@ -53,15 +54,25 @@ where
NTY: fmt::Debug,
{
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(
fmt,
"count {} ts {:?} .. {:?} vals {:?} .. {:?}",
self.tss.len(),
self.tss.front(),
self.tss.back(),
self.values.front(),
self.values.back(),
)
if true {
write!(
fmt,
"EventsDim0 {{ count {} ts {:?} vals {:?} }}",
self.tss.len(),
self.tss.iter().map(|x| x / SEC).collect::<Vec<_>>(),
self.values,
)
} else {
write!(
fmt,
"EventsDim0 {{ count {} ts {:?} .. {:?} vals {:?} .. {:?} }}",
self.tss.len(),
self.tss.front().map(|x| x / SEC),
self.tss.back().map(|x| x / SEC),
self.values.front(),
self.values.back(),
)
}
}
}
@@ -539,7 +550,8 @@ impl<NTY: ScalarOps> Events for EventsDim0<NTY> {
}
fn take_new_events_until_ts(&mut self, ts_end: u64) -> Box<dyn Events> {
let n1 = self.tss.iter().take_while(|&&x| x < ts_end).count();
// TODO improve the search
let n1 = self.tss.iter().take_while(|&&x| x <= ts_end).count();
let tss = self.tss.drain(..n1).collect();
let pulses = self.pulses.drain(..n1).collect();
let values = self.values.drain(..n1).collect();
@@ -547,6 +559,32 @@ impl<NTY: ScalarOps> Events for EventsDim0<NTY> {
Box::new(ret)
}
fn move_into_fresh(&mut self, ts_end: u64) -> Box<dyn Events> {
// TODO improve the search
let n1 = self.tss.iter().take_while(|&&x| x <= ts_end).count();
let tss = self.tss.drain(..n1).collect();
let pulses = self.pulses.drain(..n1).collect();
let values = self.values.drain(..n1).collect();
let ret = Self { tss, pulses, values };
Box::new(ret)
}
fn move_into_existing(&mut self, tgt: &mut Box<dyn Events>, ts_end: u64) -> Result<(), ()> {
// TODO as_any and as_any_mut are declared on unrealted traits. Simplify.
if let Some(tgt) = tgt.as_any_mut().downcast_mut::<Self>() {
// TODO improve the search
let n1 = self.tss.iter().take_while(|&&x| x <= ts_end).count();
// TODO make it harder to forget new members when the struct may get modified in the future
tgt.tss.extend(self.tss.drain(..n1));
tgt.pulses.extend(self.pulses.drain(..n1));
tgt.values.extend(self.values.drain(..n1));
Ok(())
} else {
eprintln!("downcast to EventsDim0 FAILED");
Err(())
}
}
fn ts_min(&self) -> Option<u64> {
self.tss.front().map(|&x| x)
}

View File

@@ -1,10 +1,14 @@
pub mod binsdim0;
pub mod eventsdim0;
pub mod merger;
pub mod merger_cev;
pub mod streams;
#[cfg(test)]
pub mod test;
pub mod testgen;
pub mod timebin;
use crate::streams::Collector;
use chrono::{DateTime, TimeZone, Utc};
use futures_util::FutureExt;
use futures_util::Stream;
@@ -14,6 +18,7 @@ use items::RangeCompletableItem;
use items::Sitemty;
use items::StreamItem;
use items::SubFrId;
use merger_cev::MergeableCev;
use netpod::log::*;
use netpod::timeunits::*;
use netpod::{AggKind, NanoRange, ScalarType, Shape};
@@ -27,8 +32,6 @@ use std::time::Instant;
use streams::Collectable;
use streams::ToJsonResult;
use crate::streams::Collector;
pub fn bool_is_false(x: &bool) -> bool {
*x == false
}
@@ -276,6 +279,8 @@ pub trait TimeBinner: Send {
fn set_range_complete(&mut self);
}
// TODO remove the Any bound. Factor out into custom AsAny trait.
/// Provides a time-binned representation of the implementing type.
/// In contrast to `TimeBinnableType` this is meant for trait objects.
pub trait TimeBinnable: fmt::Debug + WithLen + RangeOverlapInfo + Any + Send {
@@ -287,6 +292,8 @@ pub trait TimeBinnable: fmt::Debug + WithLen + RangeOverlapInfo + Any + Send {
fn to_box_to_json_result(&self) -> Box<dyn ToJsonResult>;
}
// TODO can I remove the Any bound?
/// Container of some form of events, for use as trait object.
pub trait Events: fmt::Debug + Any + Collectable + TimeBinnable + Send + erased_serde::Serialize {
fn as_time_binnable(&self) -> &dyn TimeBinnable;
@@ -296,6 +303,8 @@ pub trait Events: fmt::Debug + Any + Collectable + TimeBinnable + Send + erased_
fn ts_min(&self) -> Option<u64>;
fn ts_max(&self) -> Option<u64>;
fn take_new_events_until_ts(&mut self, ts_end: u64) -> Box<dyn Events>;
fn move_into_fresh(&mut self, ts_end: u64) -> Box<dyn Events>;
fn move_into_existing(&mut self, tgt: &mut Box<dyn Events>, ts_end: u64) -> Result<(), ()>;
fn clone_dyn(&self) -> Box<dyn Events>;
fn partial_eq_dyn(&self, other: &dyn Events) -> bool;
fn serde_id(&self) -> &'static str;
@@ -304,6 +313,35 @@ pub trait Events: fmt::Debug + Any + Collectable + TimeBinnable + Send + erased_
erased_serde::serialize_trait_object!(Events);
impl crate::merger::Mergeable for Box<dyn Events> {
fn len(&self) -> usize {
self.as_ref().len()
}
fn ts_min(&self) -> Option<u64> {
self.as_ref().ts_min()
}
fn ts_max(&self) -> Option<u64> {
self.as_ref().ts_max()
}
fn is_compatible_target(&self, _tgt: &Self) -> bool {
// TODO currently unused
todo!()
}
fn move_into_fresh(&mut self, ts_end: u64) -> Self {
self.as_mut().move_into_fresh(ts_end)
}
fn move_into_existing(&mut self, tgt: &mut Self, ts_end: u64) -> Result<(), merger::MergeError> {
self.as_mut()
.move_into_existing(tgt, ts_end)
.map_err(|()| merger::MergeError::NotCompatible)
}
}
impl PartialEq for Box<dyn Events> {
fn eq(&self, other: &Self) -> bool {
Events::partial_eq_dyn(self.as_ref(), other.as_ref())
@@ -319,7 +357,7 @@ impl WithLen for EventsCollector {
}
impl Collector for EventsCollector {
fn ingest(&mut self, src: &mut dyn Collectable) {
fn ingest(&mut self, _src: &mut dyn Collectable) {
todo!()
}
@@ -534,6 +572,7 @@ pub fn empty_binned_dyn(scalar_type: &ScalarType, shape: &Shape, agg_kind: &AggK
}
}
// TODO maybe rename to ChannelStatus?
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum ConnStatus {
Connect,
@@ -546,20 +585,9 @@ pub struct ConnStatusEvent {
pub status: ConnStatus,
}
trait MergableEvents: Any {
fn ts_min(&self) -> Option<u64>;
fn ts_max(&self) -> Option<u64>;
}
impl<T: MergableEvents> MergableEvents for Box<T> {
fn ts_min(&self) -> Option<u64> {
eprintln!("TODO MergableEvents for Box<T>");
err::todoval()
}
fn ts_max(&self) -> Option<u64> {
eprintln!("TODO MergableEvents for Box<T>");
err::todoval()
impl ConnStatusEvent {
pub fn new(ts: u64, status: ConnStatus) -> Self {
Self { ts, status }
}
}
@@ -717,7 +745,7 @@ impl PartialEq for ChannelEvents {
}
}
impl MergableEvents for ChannelEvents {
impl MergeableCev for ChannelEvents {
fn ts_min(&self) -> Option<u64> {
use ChannelEvents::*;
match self {
@@ -732,6 +760,179 @@ impl MergableEvents for ChannelEvents {
}
}
impl crate::merger::Mergeable for ChannelEvents {
fn len(&self) -> usize {
match self {
ChannelEvents::Events(k) => k.len(),
ChannelEvents::Status(_) => 1,
}
}
fn ts_min(&self) -> Option<u64> {
match self {
ChannelEvents::Events(k) => k.ts_min(),
ChannelEvents::Status(k) => Some(k.ts),
}
}
fn ts_max(&self) -> Option<u64> {
match self {
ChannelEvents::Events(k) => k.ts_max(),
ChannelEvents::Status(k) => Some(k.ts),
}
}
fn is_compatible_target(&self, tgt: &Self) -> bool {
use ChannelEvents::*;
match self {
Events(_) => {
// TODO better to delegate this to inner type?
if let Events(_) = tgt {
true
} else {
false
}
}
Status(_) => {
// TODO better to delegate this to inner type?
if let Status(_) = tgt {
true
} else {
false
}
}
}
}
fn move_into_fresh(&mut self, ts_end: u64) -> Self {
match self {
ChannelEvents::Events(k) => ChannelEvents::Events(k.move_into_fresh(ts_end)),
ChannelEvents::Status(k) => ChannelEvents::Status(k.clone()),
}
}
fn move_into_existing(&mut self, tgt: &mut Self, ts_end: u64) -> Result<(), merger::MergeError> {
match self {
ChannelEvents::Events(k) => match tgt {
ChannelEvents::Events(tgt) => k.move_into_existing(tgt, ts_end),
ChannelEvents::Status(_) => Err(merger::MergeError::NotCompatible),
},
ChannelEvents::Status(_) => match tgt {
ChannelEvents::Events(_) => Err(merger::MergeError::NotCompatible),
ChannelEvents::Status(_) => Err(merger::MergeError::Full),
},
}
}
}
impl Collectable for ChannelEvents {
fn new_collector(&self) -> Box<dyn Collector> {
match self {
ChannelEvents::Events(_item) => todo!(),
ChannelEvents::Status(_) => todo!(),
}
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
pub struct ChannelEventsTimeBinner {
// TODO `ConnStatus` contains all the changes that can happen to a connection, but
// here we would rather require a simplified current state for binning purpose.
edges: Vec<u64>,
do_time_weight: bool,
conn_state: ConnStatus,
binner: Option<Box<dyn crate::TimeBinner>>,
}
impl fmt::Debug for ChannelEventsTimeBinner {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("ChannelEventsTimeBinner")
.field("conn_state", &self.conn_state)
.finish()
}
}
impl crate::timebin::TimeBinner for ChannelEventsTimeBinner {
type Input = ChannelEvents;
type Output = Box<dyn crate::TimeBinned>;
fn ingest(&mut self, item: &mut Self::Input) {
match item {
ChannelEvents::Events(item) => {
if self.binner.is_none() {
let binner = item.time_binner_new(self.edges.clone(), self.do_time_weight);
self.binner = Some(binner);
}
match self.binner.as_mut() {
Some(binner) => binner.ingest(item.as_time_binnable()),
None => {
error!("ingest without active binner item {item:?}");
()
}
}
}
ChannelEvents::Status(item) => {
warn!("TODO consider channel status in time binning {item:?}");
}
}
}
fn set_range_complete(&mut self) {
match self.binner.as_mut() {
Some(binner) => binner.set_range_complete(),
None => (),
}
}
fn bins_ready_count(&self) -> usize {
match &self.binner {
Some(binner) => binner.bins_ready_count(),
None => 0,
}
}
fn bins_ready(&mut self) -> Option<Self::Output> {
match self.binner.as_mut() {
Some(binner) => binner.bins_ready(),
None => None,
}
}
fn push_in_progress(&mut self, push_empty: bool) {
match self.binner.as_mut() {
Some(binner) => binner.push_in_progress(push_empty),
None => (),
}
}
fn cycle(&mut self) {
match self.binner.as_mut() {
Some(binner) => binner.cycle(),
None => (),
}
}
}
impl crate::timebin::TimeBinnable for ChannelEvents {
type TimeBinner = ChannelEventsTimeBinner;
fn time_binner_new(&self, edges: Vec<u64>, do_time_weight: bool) -> Self::TimeBinner {
let (binner, status) = match self {
ChannelEvents::Events(_events) => (None, ConnStatus::Connect),
ChannelEvents::Status(status) => (None, status.status.clone()),
};
ChannelEventsTimeBinner {
edges,
do_time_weight,
conn_state: status,
binner,
}
}
}
// TODO do this with some blanket impl:
impl Collectable for Box<dyn Collectable> {
fn new_collector(&self) -> Box<dyn streams::Collector> {

View File

@@ -1,5 +1,6 @@
use crate::{ChannelEvents, Error, MergableEvents};
use crate::Error;
use futures_util::{Stream, StreamExt};
use items::sitem_data;
use items::{RangeCompletableItem, Sitemty, StreamItem};
use netpod::log::*;
use std::fmt;
@@ -7,50 +8,126 @@ use std::ops::ControlFlow;
use std::pin::Pin;
use std::task::{Context, Poll};
type MergeInp = Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>;
#[allow(unused)]
macro_rules! trace2 {
($($arg:tt)*) => ();
($($arg:tt)*) => (eprintln!($($arg)*));
}
pub struct ChannelEventsMerger {
inps: Vec<Option<MergeInp>>,
items: Vec<Option<ChannelEvents>>,
#[allow(unused)]
macro_rules! trace3 {
($($arg:tt)*) => ();
($($arg:tt)*) => (eprintln!($($arg)*));
}
#[allow(unused)]
macro_rules! trace4 {
($($arg:tt)*) => ();
($($arg:tt)*) => (eprintln!($($arg)*));
}
#[derive(Debug)]
pub enum MergeError {
NotCompatible,
Full,
}
impl From<MergeError> for err::Error {
fn from(e: MergeError) -> Self {
format!("{e:?}").into()
}
}
pub trait Mergeable<Rhs = Self>: fmt::Debug + Unpin {
fn len(&self) -> usize;
fn ts_min(&self) -> Option<u64>;
fn ts_max(&self) -> Option<u64>;
// TODO remove, useless.
fn is_compatible_target(&self, tgt: &Rhs) -> bool;
// TODO rename to `append_*` to make it clear that they simply append, but not re-sort.
fn move_into_fresh(&mut self, ts_end: u64) -> Rhs;
fn move_into_existing(&mut self, tgt: &mut Rhs, ts_end: u64) -> Result<(), MergeError>;
}
type MergeInp<T> = Pin<Box<dyn Stream<Item = Sitemty<T>> + Send>>;
pub struct Merger<T> {
inps: Vec<Option<MergeInp<T>>>,
items: Vec<Option<T>>,
out: Option<T>,
do_clear_out: bool,
out_max_len: usize,
range_complete: bool,
done: bool,
done2: bool,
done3: bool,
complete: bool,
}
impl fmt::Debug for ChannelEventsMerger {
impl<T> fmt::Debug for Merger<T>
where
T: Mergeable,
{
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let inps: Vec<_> = self.inps.iter().map(|x| x.is_some()).collect();
fmt.debug_struct(std::any::type_name::<Self>())
.field("inps", &inps)
.field("items", &self.items)
.field("out_max_len", &self.out_max_len)
.field("range_complete", &self.range_complete)
.field("done", &self.done)
.field("done2", &self.done2)
.field("done3", &self.done3)
.finish()
}
}
impl ChannelEventsMerger {
pub fn new(inps: Vec<MergeInp>) -> Self {
impl<T> Merger<T>
where
T: Mergeable,
{
pub fn new(inps: Vec<MergeInp<T>>, out_max_len: usize) -> Self {
let n = inps.len();
Self {
done: false,
done2: false,
complete: false,
inps: inps.into_iter().map(|x| Some(x)).collect(),
items: (0..n).into_iter().map(|_| None).collect(),
out: None,
do_clear_out: false,
out_max_len,
range_complete: false,
done: false,
done2: false,
done3: false,
complete: false,
}
}
fn process(mut self: Pin<&mut Self>, _cx: &mut Context) -> Result<ControlFlow<ChannelEvents>, Error> {
fn take_into_output_all(&mut self, src: &mut T) -> Result<(), MergeError> {
// TODO optimize the case when some large batch should be added to some existing small batch already in out.
// TODO maybe use two output slots?
self.take_into_output_upto(src, u64::MAX)
}
fn take_into_output_upto(&mut self, src: &mut T, upto: u64) -> Result<(), MergeError> {
// TODO optimize the case when some large batch should be added to some existing small batch already in out.
// TODO maybe use two output slots?
if self.out.is_none() {
trace2!("move into fresh");
self.out = Some(src.move_into_fresh(upto));
Ok(())
} else {
let out = self.out.as_mut().unwrap();
src.move_into_existing(out, upto)
}
}
fn process(mut self: Pin<&mut Self>, _cx: &mut Context) -> Result<ControlFlow<()>, Error> {
use ControlFlow::*;
let mut tslows = [None, None];
for (i1, itemopt) in self.items.iter_mut().enumerate() {
if let Some(item) = itemopt {
let t1 = item.ts_min();
if let Some(t1) = t1 {
if let Some(t1) = item.ts_min() {
if let Some((_, a)) = tslows[0] {
if t1 < a {
tslows[1] = tslows[0];
@@ -70,58 +147,72 @@ impl ChannelEventsMerger {
tslows[0] = Some((i1, t1));
}
} else {
match item {
ChannelEvents::Events(_) => {
trace!("events item without ts min discovered {item:?}");
itemopt.take();
return Ok(Continue(()));
}
ChannelEvents::Status(_) => {
return Err(format!("channel status without timestamp").into());
}
}
// the item seems empty.
trace2!("empty item, something to do here?");
*itemopt = None;
return Ok(Continue(()));
}
}
}
trace4!("tslows {tslows:?}");
if let Some((il0, _tl0)) = tslows[0] {
if let Some((_il1, tl1)) = tslows[1] {
// There is a second input, take only up to the second highest timestamp
let item = self.items[il0].as_mut().unwrap();
match item {
ChannelEvents::Events(item) => {
if let Some(th0) = item.ts_max() {
if th0 < tl1 {
let ret = self.items[il0].take().unwrap();
Ok(Break(ret))
} else {
let ritem = item.take_new_events_until_ts(tl1);
if item.len() == 0 {
// TODO should never be here
self.items[il0] = None;
}
Ok(Break(ChannelEvents::Events(ritem)))
if let Some(th0) = item.ts_max() {
if th0 <= tl1 {
// Can take the whole item
let mut item = self.items[il0].take().unwrap();
trace3!("Take all from item {item:?}");
match self.take_into_output_all(&mut item) {
Ok(()) => Ok(Break(())),
Err(MergeError::Full) | Err(MergeError::NotCompatible) => {
// TODO count for stats
trace3!("Put item back");
self.items[il0] = Some(item);
self.do_clear_out = true;
Ok(Break(()))
}
}
} else {
// Take only up to the lowest ts of the second-lowest input
let mut item = self.items[il0].take().unwrap();
trace3!("Take up to {tl1} from item {item:?}");
match self.take_into_output_upto(&mut item, tl1) {
Ok(()) => {
if item.len() == 0 {
// TODO should never be here because we should have taken the whole item
Err(format!("Should have taken the whole item instead").into())
} else {
self.items[il0] = Some(item);
Ok(Break(()))
}
}
Err(MergeError::Full) | Err(MergeError::NotCompatible) => {
// TODO count for stats
trace3!("Put item back");
self.items[il0] = Some(item);
self.do_clear_out = true;
Ok(Break(()))
}
} else {
// TODO should never be here because ts-max should always exist here.
let ritem = item.take_new_events_until_ts(tl1);
if item.len() == 0 {}
Ok(Break(ChannelEvents::Events(ritem)))
}
}
ChannelEvents::Status(_) => {
let ret = self.items[il0].take().unwrap();
Ok(Break(ret))
}
} else {
// TODO should never be here because ts-max should always exist here.
Err(format!("selected input without max ts").into())
}
} else {
let item = self.items[il0].as_mut().unwrap();
match item {
ChannelEvents::Events(_) => {
let ret = self.items[il0].take().unwrap();
Ok(Break(ret))
}
ChannelEvents::Status(_) => {
let ret = self.items[il0].take().unwrap();
Ok(Break(ret))
// No other input, take the whole item
let mut item = self.items[il0].take().unwrap();
trace3!("Take all from item (no other input) {item:?}");
match self.take_into_output_all(&mut item) {
Ok(()) => Ok(Break(())),
Err(_) => {
// TODO count for stats
trace3!("Put item back");
self.items[il0] = Some(item);
self.do_clear_out = true;
Ok(Break(()))
}
}
}
@@ -131,6 +222,7 @@ impl ChannelEventsMerger {
}
fn refill(mut self: Pin<&mut Self>, cx: &mut Context) -> ControlFlow<Poll<Error>> {
trace4!("refill");
use ControlFlow::*;
use Poll::*;
let mut has_pending = false;
@@ -138,6 +230,7 @@ impl ChannelEventsMerger {
let item = &self.items[i1];
if item.is_none() {
while let Some(inp) = &mut self.inps[i1] {
trace4!("refill while");
match inp.poll_next_unpin(cx) {
Ready(Some(Ok(k))) => {
match k {
@@ -149,22 +242,6 @@ impl ChannelEventsMerger {
eprintln!("TODO inp RangeComplete which does not fill slot");
}
RangeCompletableItem::Data(k) => {
match &k {
ChannelEvents::Events(events) => {
if events.len() == 0 {
warn!("empty events item {events:?}");
} else {
trace!(
"\nrefilled with events {}\nREFILLED\n{:?}\n\n",
events.len(),
events
);
}
}
ChannelEvents::Status(_) => {
eprintln!("TODO inp Status which does not fill slot");
}
}
self.items[i1] = Some(k);
break;
}
@@ -186,6 +263,8 @@ impl ChannelEventsMerger {
}
}
}
} else {
trace4!("refill inp {} has {}", i1, item.as_ref().unwrap().len());
}
}
if has_pending {
@@ -195,17 +274,13 @@ impl ChannelEventsMerger {
}
}
fn poll2(mut self: Pin<&mut Self>, cx: &mut Context) -> ControlFlow<Poll<Option<Result<ChannelEvents, Error>>>> {
fn poll3(
mut self: Pin<&mut Self>,
cx: &mut Context,
has_pending: bool,
) -> ControlFlow<Poll<Option<Result<T, Error>>>> {
use ControlFlow::*;
use Poll::*;
let mut has_pending = false;
match Self::refill(Pin::new(&mut self), cx) {
Break(Ready(e)) => return Break(Ready(Some(Err(e)))),
Break(Pending) => {
has_pending = true;
}
Continue(()) => {}
}
let ninps = self.inps.iter().filter(|a| a.is_some()).count();
let nitems = self.items.iter().filter(|a| a.is_some()).count();
let nitemsmissing = self
@@ -214,6 +289,7 @@ impl ChannelEventsMerger {
.zip(self.items.iter())
.filter(|(a, b)| a.is_some() && b.is_none())
.count();
trace3!("ninps {ninps} nitems {nitems} nitemsmissing {nitemsmissing}");
if ninps == 0 && nitems == 0 {
self.done = true;
Break(Ready(None))
@@ -226,33 +302,74 @@ impl ChannelEventsMerger {
}
} else {
match Self::process(Pin::new(&mut self), cx) {
Ok(Break(item)) => Break(Ready(Some(Ok(item)))),
Ok(Continue(())) => Continue(()),
Ok(Break(())) => {
if let Some(o) = self.out.as_ref() {
// A good threshold varies according to scalar type and shape.
// TODO replace this magic number by a bound on the bytes estimate.
if o.len() >= self.out_max_len || self.do_clear_out {
trace3!("decide to output");
self.do_clear_out = false;
Break(Ready(Some(Ok(self.out.take().unwrap()))))
} else {
trace4!("output not yet");
Continue(())
}
} else {
trace3!("no output candidate");
Continue(())
}
}
Ok(Continue(())) => {
trace2!("process returned with Continue");
Continue(())
}
Err(e) => Break(Ready(Some(Err(e)))),
}
}
}
fn poll2(mut self: Pin<&mut Self>, cx: &mut Context) -> ControlFlow<Poll<Option<Result<T, Error>>>> {
use ControlFlow::*;
use Poll::*;
match Self::refill(Pin::new(&mut self), cx) {
Break(Ready(e)) => Break(Ready(Some(Err(e)))),
Break(Pending) => Self::poll3(self, cx, true),
Continue(()) => Self::poll3(self, cx, false),
}
}
}
impl Stream for ChannelEventsMerger {
type Item = Sitemty<ChannelEvents>;
impl<T> Stream for Merger<T>
where
T: Mergeable,
{
type Item = Sitemty<T>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
const NAME: &str = "ChannelEventsMerger";
const NAME: &str = "Merger_mergeable";
let span = span!(Level::TRACE, NAME);
let _spanguard = span.enter();
loop {
trace3!("{NAME} poll");
break if self.complete {
panic!("poll after complete");
} else if self.done2 {
} else if self.done3 {
self.complete = true;
Ready(None)
} else if self.done2 {
self.done3 = true;
if self.range_complete {
warn!("TODO emit range complete only if all inputs signaled complete");
trace!("{NAME} emit RangeComplete");
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else {
continue;
}
} else if self.done {
self.done2 = true;
if self.range_complete {
trace!("MERGER EMITTING ChannelEvents::RangeComplete");
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
if let Some(out) = self.out.take() {
Ready(Some(sitem_data(out)))
} else {
continue;
}
@@ -260,18 +377,7 @@ impl Stream for ChannelEventsMerger {
match Self::poll2(self.as_mut(), cx) {
ControlFlow::Continue(()) => continue,
ControlFlow::Break(k) => match k {
Ready(Some(Ok(ChannelEvents::Events(item)))) => {
trace!("\n\nMERGER EMITTING\n{:?}\n\n", item);
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(
ChannelEvents::Events(item),
)))))
}
Ready(Some(Ok(ChannelEvents::Status(item)))) => {
trace!("\n\nMERGER EMITTING\n{:?}\n\n", item);
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(
ChannelEvents::Status(item),
)))))
}
Ready(Some(Ok(item))) => Ready(Some(sitem_data(item))),
Ready(Some(Err(e))) => {
self.done = true;
Ready(Some(Err(e.into())))

295
items_2/src/merger_cev.rs Normal file
View File

@@ -0,0 +1,295 @@
use crate::{ChannelEvents, Error};
use futures_util::{Stream, StreamExt};
use items::{RangeCompletableItem, Sitemty, StreamItem};
use netpod::log::*;
use std::any::Any;
use std::fmt;
use std::ops::ControlFlow;
use std::pin::Pin;
use std::task::{Context, Poll};
pub trait MergeableCev: Any {
fn ts_min(&self) -> Option<u64>;
fn ts_max(&self) -> Option<u64>;
}
type MergeInp = Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>;
pub struct ChannelEventsMerger {
inps: Vec<Option<MergeInp>>,
items: Vec<Option<ChannelEvents>>,
range_complete: bool,
done: bool,
done2: bool,
complete: bool,
}
impl fmt::Debug for ChannelEventsMerger {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let inps: Vec<_> = self.inps.iter().map(|x| x.is_some()).collect();
fmt.debug_struct(std::any::type_name::<Self>())
.field("inps", &inps)
.field("items", &self.items)
.field("range_complete", &self.range_complete)
.field("done", &self.done)
.field("done2", &self.done2)
.finish()
}
}
impl ChannelEventsMerger {
pub fn new(inps: Vec<MergeInp>) -> Self {
let n = inps.len();
Self {
done: false,
done2: false,
complete: false,
inps: inps.into_iter().map(|x| Some(x)).collect(),
items: (0..n).into_iter().map(|_| None).collect(),
range_complete: false,
}
}
fn process(mut self: Pin<&mut Self>, _cx: &mut Context) -> Result<ControlFlow<ChannelEvents>, Error> {
use ControlFlow::*;
let mut tslows = [None, None];
for (i1, itemopt) in self.items.iter_mut().enumerate() {
if let Some(item) = itemopt {
let t1 = item.ts_min();
if let Some(t1) = t1 {
if let Some((_, a)) = tslows[0] {
if t1 < a {
tslows[1] = tslows[0];
tslows[0] = Some((i1, t1));
} else {
if let Some((_, b)) = tslows[1] {
if t1 < b {
tslows[1] = Some((i1, t1));
} else {
// nothing to do
}
} else {
tslows[1] = Some((i1, t1));
}
}
} else {
tslows[0] = Some((i1, t1));
}
} else {
match item {
ChannelEvents::Events(_) => {
trace!("events item without ts min discovered {item:?}");
itemopt.take();
return Ok(Continue(()));
}
ChannelEvents::Status(_) => {
return Err(format!("channel status without timestamp").into());
}
}
}
}
}
if let Some((il0, _tl0)) = tslows[0] {
if let Some((_il1, tl1)) = tslows[1] {
let item = self.items[il0].as_mut().unwrap();
match item {
ChannelEvents::Events(item) => {
if let Some(th0) = item.ts_max() {
if th0 < tl1 {
let ret = self.items[il0].take().unwrap();
Ok(Break(ret))
} else {
let ritem = item.take_new_events_until_ts(tl1);
if item.len() == 0 {
// TODO should never be here
self.items[il0] = None;
}
Ok(Break(ChannelEvents::Events(ritem)))
}
} else {
// TODO should never be here because ts-max should always exist here.
let ritem = item.take_new_events_until_ts(tl1);
if item.len() == 0 {}
Ok(Break(ChannelEvents::Events(ritem)))
}
}
ChannelEvents::Status(_) => {
let ret = self.items[il0].take().unwrap();
Ok(Break(ret))
}
}
} else {
let item = self.items[il0].as_mut().unwrap();
match item {
ChannelEvents::Events(_) => {
let ret = self.items[il0].take().unwrap();
Ok(Break(ret))
}
ChannelEvents::Status(_) => {
let ret = self.items[il0].take().unwrap();
Ok(Break(ret))
}
}
}
} else {
Err(format!("after low ts search nothing found").into())
}
}
fn refill(mut self: Pin<&mut Self>, cx: &mut Context) -> ControlFlow<Poll<Error>> {
use ControlFlow::*;
use Poll::*;
let mut has_pending = false;
for i1 in 0..self.inps.len() {
let item = &self.items[i1];
if item.is_none() {
while let Some(inp) = &mut self.inps[i1] {
match inp.poll_next_unpin(cx) {
Ready(Some(Ok(k))) => {
match k {
StreamItem::DataItem(k) => match k {
RangeCompletableItem::RangeComplete => {
trace!("--------------------- ChannelEvents::RangeComplete \n======================");
// TODO track range complete for all inputs, it's only complete if all inputs are complete.
self.range_complete = true;
eprintln!("TODO inp RangeComplete which does not fill slot");
}
RangeCompletableItem::Data(k) => {
match &k {
ChannelEvents::Events(events) => {
if events.len() == 0 {
warn!("empty events item {events:?}");
} else {
trace!(
"\nrefilled with events {}\nREFILLED\n{:?}\n\n",
events.len(),
events
);
}
}
ChannelEvents::Status(_) => {
eprintln!("TODO inp Status which does not fill slot");
}
}
self.items[i1] = Some(k);
break;
}
},
StreamItem::Log(_) => {
eprintln!("TODO inp Log which does not fill slot");
}
StreamItem::Stats(_) => {
eprintln!("TODO inp Stats which does not fill slot");
}
}
}
Ready(Some(Err(e))) => return Break(Ready(e.into())),
Ready(None) => {
self.inps[i1] = None;
}
Pending => {
has_pending = true;
}
}
}
}
}
if has_pending {
Break(Pending)
} else {
Continue(())
}
}
fn poll2(mut self: Pin<&mut Self>, cx: &mut Context) -> ControlFlow<Poll<Option<Result<ChannelEvents, Error>>>> {
use ControlFlow::*;
use Poll::*;
let mut has_pending = false;
match Self::refill(Pin::new(&mut self), cx) {
Break(Ready(e)) => return Break(Ready(Some(Err(e)))),
Break(Pending) => {
has_pending = true;
}
Continue(()) => {}
}
let ninps = self.inps.iter().filter(|a| a.is_some()).count();
let nitems = self.items.iter().filter(|a| a.is_some()).count();
let nitemsmissing = self
.inps
.iter()
.zip(self.items.iter())
.filter(|(a, b)| a.is_some() && b.is_none())
.count();
if ninps == 0 && nitems == 0 {
self.done = true;
Break(Ready(None))
} else if nitemsmissing != 0 {
if !has_pending {
let e = Error::from(format!("missing but no pending"));
Break(Ready(Some(Err(e))))
} else {
Break(Pending)
}
} else {
match Self::process(Pin::new(&mut self), cx) {
Ok(Break(item)) => Break(Ready(Some(Ok(item)))),
Ok(Continue(())) => Continue(()),
Err(e) => Break(Ready(Some(Err(e)))),
}
}
}
}
impl Stream for ChannelEventsMerger {
type Item = Sitemty<ChannelEvents>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
const NAME: &str = "ChannelEventsMerger";
let span = span!(Level::TRACE, NAME);
let _spanguard = span.enter();
loop {
break if self.complete {
panic!("poll after complete");
} else if self.done2 {
self.complete = true;
Ready(None)
} else if self.done {
self.done2 = true;
if self.range_complete {
trace!("MERGER EMITTING ChannelEvents::RangeComplete");
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else {
continue;
}
} else {
match Self::poll2(self.as_mut(), cx) {
ControlFlow::Continue(()) => continue,
ControlFlow::Break(k) => match k {
Ready(Some(Ok(ChannelEvents::Events(item)))) => {
trace!("\n\nMERGER EMITTING\n{:?}\n\n", item);
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(
ChannelEvents::Events(item),
)))))
}
Ready(Some(Ok(ChannelEvents::Status(item)))) => {
trace!("\n\nMERGER EMITTING\n{:?}\n\n", item);
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(
ChannelEvents::Status(item),
)))))
}
Ready(Some(Err(e))) => {
self.done = true;
Ready(Some(Err(e.into())))
}
Ready(None) => {
self.done = true;
continue;
}
Pending => Pending,
},
}
};
}
}
}

View File

@@ -1,16 +1,128 @@
use crate::binsdim0::BinsDim0CollectedResult;
use crate::eventsdim0::EventsDim0;
use crate::merger::ChannelEventsMerger;
use crate::merger::{Mergeable, Merger};
use crate::merger_cev::ChannelEventsMerger;
use crate::testgen::make_some_boxed_d0_f32;
use crate::{binned_collected, runfut, ChannelEvents, Empty, Events, IsoDateTime};
use crate::{ConnStatus, ConnStatusEvent, Error};
use chrono::{TimeZone, Utc};
use futures_util::StreamExt;
use items::{RangeCompletableItem, Sitemty, StreamItem};
use futures_util::{stream, StreamExt};
use items::{sitem_data, RangeCompletableItem, Sitemty, StreamItem};
use netpod::log::*;
use netpod::timeunits::*;
use netpod::{AggKind, BinnedRange, NanoRange, ScalarType, Shape};
use std::time::Duration;
#[test]
fn items_move_events() {
let evs = make_some_boxed_d0_f32(10, SEC, SEC, 0, 1846713782);
let v0 = ChannelEvents::Events(evs);
let mut v1 = v0.clone();
eprintln!("{v1:?}");
eprintln!("{}", v1.len());
let mut v2 = v1.move_into_fresh(4);
eprintln!("{}", v1.len());
eprintln!("{}", v2.len());
v1.move_into_existing(&mut v2, u64::MAX).unwrap();
eprintln!("{}", v1.len());
eprintln!("{}", v2.len());
eprintln!("{v1:?}");
eprintln!("{v2:?}");
assert_eq!(v1.len(), 0);
assert_eq!(v2.len(), 10);
assert_eq!(v2, v0);
}
#[test]
fn items_merge_00() {
let fut = async {
use crate::merger::Merger;
let evs0 = make_some_boxed_d0_f32(10, SEC * 1, SEC * 2, 0, 1846713782);
let evs1 = make_some_boxed_d0_f32(10, SEC * 2, SEC * 2, 0, 828764893);
let v0 = ChannelEvents::Events(evs0);
let v1 = ChannelEvents::Events(evs1);
let stream0 = Box::pin(stream::iter(vec![sitem_data(v0)]));
let stream1 = Box::pin(stream::iter(vec![sitem_data(v1)]));
let mut merger = Merger::new(vec![stream0, stream1], 8);
while let Some(item) = merger.next().await {
eprintln!("{item:?}");
}
Ok(())
};
runfut(fut).unwrap();
}
#[test]
fn items_merge_01() {
let fut = async {
use crate::merger::Merger;
let evs0 = make_some_boxed_d0_f32(10, SEC * 1, SEC * 2, 0, 1846713782);
let evs1 = make_some_boxed_d0_f32(10, SEC * 2, SEC * 2, 0, 828764893);
let v0 = ChannelEvents::Events(evs0);
let v1 = ChannelEvents::Events(evs1);
let v2 = ChannelEvents::Status(ConnStatusEvent::new(MS * 100, ConnStatus::Connect));
let v3 = ChannelEvents::Status(ConnStatusEvent::new(MS * 2300, ConnStatus::Disconnect));
let v4 = ChannelEvents::Status(ConnStatusEvent::new(MS * 2800, ConnStatus::Connect));
let stream0 = Box::pin(stream::iter(vec![sitem_data(v0)]));
let stream1 = Box::pin(stream::iter(vec![sitem_data(v1)]));
let stream2 = Box::pin(stream::iter(vec![sitem_data(v2), sitem_data(v3), sitem_data(v4)]));
let mut merger = Merger::new(vec![stream0, stream1, stream2], 8);
let mut total_event_count = 0;
while let Some(item) = merger.next().await {
eprintln!("{item:?}");
let item = item?;
match item {
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {}
RangeCompletableItem::Data(item) => {
total_event_count += item.len();
}
},
StreamItem::Log(_) => {}
StreamItem::Stats(_) => {}
}
}
assert_eq!(total_event_count, 23);
Ok(())
};
runfut(fut).unwrap();
}
#[test]
fn items_merge_02() {
let fut = async {
let evs0 = make_some_boxed_d0_f32(100, SEC * 1, SEC * 2, 0, 1846713782);
let evs1 = make_some_boxed_d0_f32(100, SEC * 2, SEC * 2, 0, 828764893);
let v0 = ChannelEvents::Events(evs0);
let v1 = ChannelEvents::Events(evs1);
let v2 = ChannelEvents::Status(ConnStatusEvent::new(MS * 100, ConnStatus::Connect));
let v3 = ChannelEvents::Status(ConnStatusEvent::new(MS * 2300, ConnStatus::Disconnect));
let v4 = ChannelEvents::Status(ConnStatusEvent::new(MS * 2800, ConnStatus::Connect));
let stream0 = Box::pin(stream::iter(vec![sitem_data(v0)]));
let stream1 = Box::pin(stream::iter(vec![sitem_data(v1)]));
let stream2 = Box::pin(stream::iter(vec![sitem_data(v2), sitem_data(v3), sitem_data(v4)]));
let mut merger = Merger::new(vec![stream0, stream1, stream2], 8);
let mut total_event_count = 0;
while let Some(item) = merger.next().await {
eprintln!("{item:?}");
let item = item.unwrap();
match item {
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {}
RangeCompletableItem::Data(item) => {
total_event_count += item.len();
}
},
StreamItem::Log(_) => {}
StreamItem::Stats(_) => {}
}
}
assert_eq!(total_event_count, 203);
Ok(())
};
runfut(fut).unwrap();
}
#[test]
fn merge01() {
let fut = async {

23
items_2/src/testgen.rs Normal file
View File

@@ -0,0 +1,23 @@
use crate::eventsdim0::EventsDim0;
use crate::{Empty, Events};
#[allow(unused)]
fn xorshift32(state: u32) -> u32 {
let mut x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
x
}
pub fn make_some_boxed_d0_f32(n: usize, t0: u64, tstep: u64, tmask: u64, seed: u32) -> Box<dyn Events> {
let mut vstate = seed;
let mut events = EventsDim0::empty();
for i in 0..n {
vstate = xorshift32(vstate);
let ts = t0 + i as u64 * tstep + (vstate as u64 & tmask);
let value = i as f32 * 100. + vstate as f32 / u32::MAX as f32 / 10.;
events.push(ts, ts, value);
}
Box::new(events.clone())
}

29
items_2/src/timebin.rs Normal file
View File

@@ -0,0 +1,29 @@
use std::fmt;
pub trait TimeBinner: fmt::Debug + Unpin {
type Input;
type Output;
fn ingest(&mut self, item: &mut Self::Input);
fn set_range_complete(&mut self);
fn bins_ready_count(&self) -> usize;
fn bins_ready(&mut self) -> Option<Self::Output>;
/// If there is a bin in progress with non-zero count, push it to the result set.
/// With push_empty == true, a bin in progress is pushed even if it contains no counts.
fn push_in_progress(&mut self, push_empty: bool);
/// Implies `Self::push_in_progress` but in addition, pushes a zero-count bin if the call
/// to `push_in_progress` did not change the result count, as long as edges are left.
/// The next call to `Self::bins_ready_count` must return one higher count than before.
fn cycle(&mut self);
}
pub trait TimeBinnable: fmt::Debug + Sized {
type TimeBinner: TimeBinner<Input = Self>;
fn time_binner_new(&self, edges: Vec<u64>, do_time_weight: bool) -> Self::TimeBinner;
}