Add alternative time binner
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
use crate::{ChannelEvents, Error, MergableEvents};
|
||||
use crate::Error;
|
||||
use futures_util::{Stream, StreamExt};
|
||||
use items::sitem_data;
|
||||
use items::{RangeCompletableItem, Sitemty, StreamItem};
|
||||
use netpod::log::*;
|
||||
use std::fmt;
|
||||
@@ -7,50 +8,126 @@ use std::ops::ControlFlow;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
type MergeInp = Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>;
|
||||
#[allow(unused)]
|
||||
macro_rules! trace2 {
|
||||
($($arg:tt)*) => ();
|
||||
($($arg:tt)*) => (eprintln!($($arg)*));
|
||||
}
|
||||
|
||||
pub struct ChannelEventsMerger {
|
||||
inps: Vec<Option<MergeInp>>,
|
||||
items: Vec<Option<ChannelEvents>>,
|
||||
#[allow(unused)]
|
||||
macro_rules! trace3 {
|
||||
($($arg:tt)*) => ();
|
||||
($($arg:tt)*) => (eprintln!($($arg)*));
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! trace4 {
|
||||
($($arg:tt)*) => ();
|
||||
($($arg:tt)*) => (eprintln!($($arg)*));
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum MergeError {
|
||||
NotCompatible,
|
||||
Full,
|
||||
}
|
||||
|
||||
impl From<MergeError> for err::Error {
|
||||
fn from(e: MergeError) -> Self {
|
||||
format!("{e:?}").into()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Mergeable<Rhs = Self>: fmt::Debug + Unpin {
|
||||
fn len(&self) -> usize;
|
||||
fn ts_min(&self) -> Option<u64>;
|
||||
fn ts_max(&self) -> Option<u64>;
|
||||
// TODO remove, useless.
|
||||
fn is_compatible_target(&self, tgt: &Rhs) -> bool;
|
||||
|
||||
// TODO rename to `append_*` to make it clear that they simply append, but not re-sort.
|
||||
fn move_into_fresh(&mut self, ts_end: u64) -> Rhs;
|
||||
fn move_into_existing(&mut self, tgt: &mut Rhs, ts_end: u64) -> Result<(), MergeError>;
|
||||
}
|
||||
|
||||
type MergeInp<T> = Pin<Box<dyn Stream<Item = Sitemty<T>> + Send>>;
|
||||
|
||||
pub struct Merger<T> {
|
||||
inps: Vec<Option<MergeInp<T>>>,
|
||||
items: Vec<Option<T>>,
|
||||
out: Option<T>,
|
||||
do_clear_out: bool,
|
||||
out_max_len: usize,
|
||||
range_complete: bool,
|
||||
done: bool,
|
||||
done2: bool,
|
||||
done3: bool,
|
||||
complete: bool,
|
||||
}
|
||||
|
||||
impl fmt::Debug for ChannelEventsMerger {
|
||||
impl<T> fmt::Debug for Merger<T>
|
||||
where
|
||||
T: Mergeable,
|
||||
{
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let inps: Vec<_> = self.inps.iter().map(|x| x.is_some()).collect();
|
||||
fmt.debug_struct(std::any::type_name::<Self>())
|
||||
.field("inps", &inps)
|
||||
.field("items", &self.items)
|
||||
.field("out_max_len", &self.out_max_len)
|
||||
.field("range_complete", &self.range_complete)
|
||||
.field("done", &self.done)
|
||||
.field("done2", &self.done2)
|
||||
.field("done3", &self.done3)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl ChannelEventsMerger {
|
||||
pub fn new(inps: Vec<MergeInp>) -> Self {
|
||||
impl<T> Merger<T>
|
||||
where
|
||||
T: Mergeable,
|
||||
{
|
||||
pub fn new(inps: Vec<MergeInp<T>>, out_max_len: usize) -> Self {
|
||||
let n = inps.len();
|
||||
Self {
|
||||
done: false,
|
||||
done2: false,
|
||||
complete: false,
|
||||
inps: inps.into_iter().map(|x| Some(x)).collect(),
|
||||
items: (0..n).into_iter().map(|_| None).collect(),
|
||||
out: None,
|
||||
do_clear_out: false,
|
||||
out_max_len,
|
||||
range_complete: false,
|
||||
done: false,
|
||||
done2: false,
|
||||
done3: false,
|
||||
complete: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn process(mut self: Pin<&mut Self>, _cx: &mut Context) -> Result<ControlFlow<ChannelEvents>, Error> {
|
||||
fn take_into_output_all(&mut self, src: &mut T) -> Result<(), MergeError> {
|
||||
// TODO optimize the case when some large batch should be added to some existing small batch already in out.
|
||||
// TODO maybe use two output slots?
|
||||
self.take_into_output_upto(src, u64::MAX)
|
||||
}
|
||||
|
||||
fn take_into_output_upto(&mut self, src: &mut T, upto: u64) -> Result<(), MergeError> {
|
||||
// TODO optimize the case when some large batch should be added to some existing small batch already in out.
|
||||
// TODO maybe use two output slots?
|
||||
if self.out.is_none() {
|
||||
trace2!("move into fresh");
|
||||
self.out = Some(src.move_into_fresh(upto));
|
||||
Ok(())
|
||||
} else {
|
||||
let out = self.out.as_mut().unwrap();
|
||||
src.move_into_existing(out, upto)
|
||||
}
|
||||
}
|
||||
|
||||
fn process(mut self: Pin<&mut Self>, _cx: &mut Context) -> Result<ControlFlow<()>, Error> {
|
||||
use ControlFlow::*;
|
||||
let mut tslows = [None, None];
|
||||
for (i1, itemopt) in self.items.iter_mut().enumerate() {
|
||||
if let Some(item) = itemopt {
|
||||
let t1 = item.ts_min();
|
||||
if let Some(t1) = t1 {
|
||||
if let Some(t1) = item.ts_min() {
|
||||
if let Some((_, a)) = tslows[0] {
|
||||
if t1 < a {
|
||||
tslows[1] = tslows[0];
|
||||
@@ -70,58 +147,72 @@ impl ChannelEventsMerger {
|
||||
tslows[0] = Some((i1, t1));
|
||||
}
|
||||
} else {
|
||||
match item {
|
||||
ChannelEvents::Events(_) => {
|
||||
trace!("events item without ts min discovered {item:?}");
|
||||
itemopt.take();
|
||||
return Ok(Continue(()));
|
||||
}
|
||||
ChannelEvents::Status(_) => {
|
||||
return Err(format!("channel status without timestamp").into());
|
||||
}
|
||||
}
|
||||
// the item seems empty.
|
||||
trace2!("empty item, something to do here?");
|
||||
*itemopt = None;
|
||||
return Ok(Continue(()));
|
||||
}
|
||||
}
|
||||
}
|
||||
trace4!("tslows {tslows:?}");
|
||||
if let Some((il0, _tl0)) = tslows[0] {
|
||||
if let Some((_il1, tl1)) = tslows[1] {
|
||||
// There is a second input, take only up to the second highest timestamp
|
||||
let item = self.items[il0].as_mut().unwrap();
|
||||
match item {
|
||||
ChannelEvents::Events(item) => {
|
||||
if let Some(th0) = item.ts_max() {
|
||||
if th0 < tl1 {
|
||||
let ret = self.items[il0].take().unwrap();
|
||||
Ok(Break(ret))
|
||||
} else {
|
||||
let ritem = item.take_new_events_until_ts(tl1);
|
||||
if item.len() == 0 {
|
||||
// TODO should never be here
|
||||
self.items[il0] = None;
|
||||
}
|
||||
Ok(Break(ChannelEvents::Events(ritem)))
|
||||
if let Some(th0) = item.ts_max() {
|
||||
if th0 <= tl1 {
|
||||
// Can take the whole item
|
||||
let mut item = self.items[il0].take().unwrap();
|
||||
trace3!("Take all from item {item:?}");
|
||||
match self.take_into_output_all(&mut item) {
|
||||
Ok(()) => Ok(Break(())),
|
||||
Err(MergeError::Full) | Err(MergeError::NotCompatible) => {
|
||||
// TODO count for stats
|
||||
trace3!("Put item back");
|
||||
self.items[il0] = Some(item);
|
||||
self.do_clear_out = true;
|
||||
Ok(Break(()))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Take only up to the lowest ts of the second-lowest input
|
||||
let mut item = self.items[il0].take().unwrap();
|
||||
trace3!("Take up to {tl1} from item {item:?}");
|
||||
match self.take_into_output_upto(&mut item, tl1) {
|
||||
Ok(()) => {
|
||||
if item.len() == 0 {
|
||||
// TODO should never be here because we should have taken the whole item
|
||||
Err(format!("Should have taken the whole item instead").into())
|
||||
} else {
|
||||
self.items[il0] = Some(item);
|
||||
Ok(Break(()))
|
||||
}
|
||||
}
|
||||
Err(MergeError::Full) | Err(MergeError::NotCompatible) => {
|
||||
// TODO count for stats
|
||||
trace3!("Put item back");
|
||||
self.items[il0] = Some(item);
|
||||
self.do_clear_out = true;
|
||||
Ok(Break(()))
|
||||
}
|
||||
} else {
|
||||
// TODO should never be here because ts-max should always exist here.
|
||||
let ritem = item.take_new_events_until_ts(tl1);
|
||||
if item.len() == 0 {}
|
||||
Ok(Break(ChannelEvents::Events(ritem)))
|
||||
}
|
||||
}
|
||||
ChannelEvents::Status(_) => {
|
||||
let ret = self.items[il0].take().unwrap();
|
||||
Ok(Break(ret))
|
||||
}
|
||||
} else {
|
||||
// TODO should never be here because ts-max should always exist here.
|
||||
Err(format!("selected input without max ts").into())
|
||||
}
|
||||
} else {
|
||||
let item = self.items[il0].as_mut().unwrap();
|
||||
match item {
|
||||
ChannelEvents::Events(_) => {
|
||||
let ret = self.items[il0].take().unwrap();
|
||||
Ok(Break(ret))
|
||||
}
|
||||
ChannelEvents::Status(_) => {
|
||||
let ret = self.items[il0].take().unwrap();
|
||||
Ok(Break(ret))
|
||||
// No other input, take the whole item
|
||||
let mut item = self.items[il0].take().unwrap();
|
||||
trace3!("Take all from item (no other input) {item:?}");
|
||||
match self.take_into_output_all(&mut item) {
|
||||
Ok(()) => Ok(Break(())),
|
||||
Err(_) => {
|
||||
// TODO count for stats
|
||||
trace3!("Put item back");
|
||||
self.items[il0] = Some(item);
|
||||
self.do_clear_out = true;
|
||||
Ok(Break(()))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -131,6 +222,7 @@ impl ChannelEventsMerger {
|
||||
}
|
||||
|
||||
fn refill(mut self: Pin<&mut Self>, cx: &mut Context) -> ControlFlow<Poll<Error>> {
|
||||
trace4!("refill");
|
||||
use ControlFlow::*;
|
||||
use Poll::*;
|
||||
let mut has_pending = false;
|
||||
@@ -138,6 +230,7 @@ impl ChannelEventsMerger {
|
||||
let item = &self.items[i1];
|
||||
if item.is_none() {
|
||||
while let Some(inp) = &mut self.inps[i1] {
|
||||
trace4!("refill while");
|
||||
match inp.poll_next_unpin(cx) {
|
||||
Ready(Some(Ok(k))) => {
|
||||
match k {
|
||||
@@ -149,22 +242,6 @@ impl ChannelEventsMerger {
|
||||
eprintln!("TODO inp RangeComplete which does not fill slot");
|
||||
}
|
||||
RangeCompletableItem::Data(k) => {
|
||||
match &k {
|
||||
ChannelEvents::Events(events) => {
|
||||
if events.len() == 0 {
|
||||
warn!("empty events item {events:?}");
|
||||
} else {
|
||||
trace!(
|
||||
"\nrefilled with events {}\nREFILLED\n{:?}\n\n",
|
||||
events.len(),
|
||||
events
|
||||
);
|
||||
}
|
||||
}
|
||||
ChannelEvents::Status(_) => {
|
||||
eprintln!("TODO inp Status which does not fill slot");
|
||||
}
|
||||
}
|
||||
self.items[i1] = Some(k);
|
||||
break;
|
||||
}
|
||||
@@ -186,6 +263,8 @@ impl ChannelEventsMerger {
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
trace4!("refill inp {} has {}", i1, item.as_ref().unwrap().len());
|
||||
}
|
||||
}
|
||||
if has_pending {
|
||||
@@ -195,17 +274,13 @@ impl ChannelEventsMerger {
|
||||
}
|
||||
}
|
||||
|
||||
fn poll2(mut self: Pin<&mut Self>, cx: &mut Context) -> ControlFlow<Poll<Option<Result<ChannelEvents, Error>>>> {
|
||||
fn poll3(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context,
|
||||
has_pending: bool,
|
||||
) -> ControlFlow<Poll<Option<Result<T, Error>>>> {
|
||||
use ControlFlow::*;
|
||||
use Poll::*;
|
||||
let mut has_pending = false;
|
||||
match Self::refill(Pin::new(&mut self), cx) {
|
||||
Break(Ready(e)) => return Break(Ready(Some(Err(e)))),
|
||||
Break(Pending) => {
|
||||
has_pending = true;
|
||||
}
|
||||
Continue(()) => {}
|
||||
}
|
||||
let ninps = self.inps.iter().filter(|a| a.is_some()).count();
|
||||
let nitems = self.items.iter().filter(|a| a.is_some()).count();
|
||||
let nitemsmissing = self
|
||||
@@ -214,6 +289,7 @@ impl ChannelEventsMerger {
|
||||
.zip(self.items.iter())
|
||||
.filter(|(a, b)| a.is_some() && b.is_none())
|
||||
.count();
|
||||
trace3!("ninps {ninps} nitems {nitems} nitemsmissing {nitemsmissing}");
|
||||
if ninps == 0 && nitems == 0 {
|
||||
self.done = true;
|
||||
Break(Ready(None))
|
||||
@@ -226,33 +302,74 @@ impl ChannelEventsMerger {
|
||||
}
|
||||
} else {
|
||||
match Self::process(Pin::new(&mut self), cx) {
|
||||
Ok(Break(item)) => Break(Ready(Some(Ok(item)))),
|
||||
Ok(Continue(())) => Continue(()),
|
||||
Ok(Break(())) => {
|
||||
if let Some(o) = self.out.as_ref() {
|
||||
// A good threshold varies according to scalar type and shape.
|
||||
// TODO replace this magic number by a bound on the bytes estimate.
|
||||
if o.len() >= self.out_max_len || self.do_clear_out {
|
||||
trace3!("decide to output");
|
||||
self.do_clear_out = false;
|
||||
Break(Ready(Some(Ok(self.out.take().unwrap()))))
|
||||
} else {
|
||||
trace4!("output not yet");
|
||||
Continue(())
|
||||
}
|
||||
} else {
|
||||
trace3!("no output candidate");
|
||||
Continue(())
|
||||
}
|
||||
}
|
||||
Ok(Continue(())) => {
|
||||
trace2!("process returned with Continue");
|
||||
Continue(())
|
||||
}
|
||||
Err(e) => Break(Ready(Some(Err(e)))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn poll2(mut self: Pin<&mut Self>, cx: &mut Context) -> ControlFlow<Poll<Option<Result<T, Error>>>> {
|
||||
use ControlFlow::*;
|
||||
use Poll::*;
|
||||
match Self::refill(Pin::new(&mut self), cx) {
|
||||
Break(Ready(e)) => Break(Ready(Some(Err(e)))),
|
||||
Break(Pending) => Self::poll3(self, cx, true),
|
||||
Continue(()) => Self::poll3(self, cx, false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for ChannelEventsMerger {
|
||||
type Item = Sitemty<ChannelEvents>;
|
||||
impl<T> Stream for Merger<T>
|
||||
where
|
||||
T: Mergeable,
|
||||
{
|
||||
type Item = Sitemty<T>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
const NAME: &str = "ChannelEventsMerger";
|
||||
const NAME: &str = "Merger_mergeable";
|
||||
let span = span!(Level::TRACE, NAME);
|
||||
let _spanguard = span.enter();
|
||||
loop {
|
||||
trace3!("{NAME} poll");
|
||||
break if self.complete {
|
||||
panic!("poll after complete");
|
||||
} else if self.done2 {
|
||||
} else if self.done3 {
|
||||
self.complete = true;
|
||||
Ready(None)
|
||||
} else if self.done2 {
|
||||
self.done3 = true;
|
||||
if self.range_complete {
|
||||
warn!("TODO emit range complete only if all inputs signaled complete");
|
||||
trace!("{NAME} emit RangeComplete");
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else if self.done {
|
||||
self.done2 = true;
|
||||
if self.range_complete {
|
||||
trace!("MERGER EMITTING ChannelEvents::RangeComplete");
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
|
||||
if let Some(out) = self.out.take() {
|
||||
Ready(Some(sitem_data(out)))
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
@@ -260,18 +377,7 @@ impl Stream for ChannelEventsMerger {
|
||||
match Self::poll2(self.as_mut(), cx) {
|
||||
ControlFlow::Continue(()) => continue,
|
||||
ControlFlow::Break(k) => match k {
|
||||
Ready(Some(Ok(ChannelEvents::Events(item)))) => {
|
||||
trace!("\n\nMERGER EMITTING\n{:?}\n\n", item);
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(
|
||||
ChannelEvents::Events(item),
|
||||
)))))
|
||||
}
|
||||
Ready(Some(Ok(ChannelEvents::Status(item)))) => {
|
||||
trace!("\n\nMERGER EMITTING\n{:?}\n\n", item);
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(
|
||||
ChannelEvents::Status(item),
|
||||
)))))
|
||||
}
|
||||
Ready(Some(Ok(item))) => Ready(Some(sitem_data(item))),
|
||||
Ready(Some(Err(e))) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e.into())))
|
||||
|
||||
Reference in New Issue
Block a user