WIP on collector

This commit is contained in:
Dominik Werder
2022-11-18 16:01:35 +01:00
parent d57aa5474e
commit 7cdf5975b9
7 changed files with 228 additions and 101 deletions

23
items_2/src/collect.rs Normal file
View File

@@ -0,0 +1,23 @@
use crate::Error;
use std::fmt;
pub trait Collector: fmt::Debug {
type Input;
type Output;
fn len(&self) -> usize;
fn ingest(&mut self, item: &mut Self::Input);
fn set_range_complete(&mut self);
fn set_timed_out(&mut self);
fn result(&mut self) -> Result<Self::Output, Error>;
}
pub trait Collectable: fmt::Debug {
type Collector: Collector<Input = Self>;
fn new_collector(&self) -> Self::Collector;
}

View File

@@ -122,6 +122,7 @@ impl<NTY: ScalarOps> TimeBinnableType for EventsDim0<NTY> {
}
}
#[derive(Debug)]
pub struct EventsDim0Collector<NTY> {
vals: EventsDim0<NTY>,
range_complete: bool,
@@ -219,6 +220,32 @@ impl<NTY: ScalarOps> CollectableType for EventsDim0<NTY> {
}
}
impl<NTY: ScalarOps> crate::collect::Collector for EventsDim0Collector<NTY> {
type Input = EventsDim0<NTY>;
// TODO the output probably needs to be different to accommodate also range-complete, continue-at, etc
type Output = EventsDim0CollectorOutput<NTY>;
fn len(&self) -> usize {
self.vals.len()
}
fn ingest(&mut self, item: &mut Self::Input) {
CollectorType::ingest(self, item)
}
fn set_range_complete(&mut self) {
CollectorType::set_range_complete(self)
}
fn set_timed_out(&mut self) {
CollectorType::set_timed_out(self)
}
fn result(&mut self) -> Result<Self::Output, crate::Error> {
CollectorType::result(self).map_err(Into::into)
}
}
pub struct EventsDim0Aggregator<NTY> {
range: NanoRange,
count: u64,
@@ -809,3 +836,11 @@ impl<NTY: ScalarOps> TimeBinner for EventsDim0TimeBinner<NTY> {
self.range_complete = true;
}
}
impl<NTY: ScalarOps> crate::collect::Collectable for EventsDim0<NTY> {
type Collector;
fn new_collector(&self) -> Self::Collector {
todo!()
}
}

View File

@@ -1,4 +1,5 @@
pub mod binsdim0;
pub mod collect;
pub mod eventsdim0;
pub mod merger;
pub mod merger_cev;
@@ -365,15 +366,17 @@ impl PartialEq for Box<dyn Events> {
}
}
struct EventsCollector {}
// TODO remove
struct EventsCollector2 {}
impl WithLen for EventsCollector {
impl WithLen for EventsCollector2 {
fn len(&self) -> usize {
todo!()
}
}
impl Collector for EventsCollector {
// TODO remove
impl Collector for EventsCollector2 {
fn ingest(&mut self, _src: &mut dyn Collectable) {
todo!()
}
@@ -391,9 +394,10 @@ impl Collector for EventsCollector {
}
}
// TODO remove
impl Collectable for Box<dyn Events> {
fn new_collector(&self) -> Box<dyn Collector> {
Box::new(EventsCollector {})
Box::new(EventsCollector2 {})
}
fn as_any_mut(&mut self) -> &mut dyn Any {
@@ -737,7 +741,8 @@ mod serde_channel_events {
#[cfg(test)]
mod test_channel_events_serde {
use super::ChannelEvents;
use crate::{eventsdim0::EventsDim0, Empty};
use crate::eventsdim0::EventsDim0;
use crate::Empty;
#[test]
fn channel_events() {
@@ -950,6 +955,50 @@ impl crate::timebin::TimeBinnable for ChannelEvents {
}
}
#[derive(Debug)]
pub struct EventsCollector {
coll: Option<Box<()>>,
}
impl EventsCollector {
pub fn new() -> Self {
Self { coll: Box::new(()) }
}
}
impl crate::collect::Collector for EventsCollector {
type Input = Box<dyn Events>;
type Output = Box<dyn Events>;
fn len(&self) -> usize {
todo!()
}
fn ingest(&mut self, item: &mut Self::Input) {
todo!()
}
fn set_range_complete(&mut self) {
todo!()
}
fn set_timed_out(&mut self) {
todo!()
}
fn result(&mut self) -> Result<Self::Output, Error> {
todo!()
}
}
impl crate::collect::Collectable for Box<dyn Events> {
type Collector = EventsCollector;
fn new_collector(&self) -> Self::Collector {
Collectable::new_collector(self)
}
}
// TODO do this with some blanket impl:
impl Collectable for Box<dyn Collectable> {
fn new_collector(&self) -> Box<dyn streams::Collector> {
@@ -994,7 +1043,7 @@ fn flush_binned(
}
}
// TODO handle status information.
// TODO remove
pub async fn binned_collected(
scalar_type: ScalarType,
shape: Shape,

View File

@@ -11,6 +11,8 @@ pub trait CollectorType: Send + Unpin + WithLen {
fn ingest(&mut self, src: &mut Self::Input);
fn set_range_complete(&mut self);
fn set_timed_out(&mut self);
// TODO use this crate's Error instead:
fn result(&mut self) -> Result<Self::Output, Error>;
}

View File

@@ -1,106 +1,120 @@
use err::Error;
use futures_util::{Stream, StreamExt};
use items::{RangeCompletableItem, Sitemty, StreamItem};
use items_2::streams::{Collectable, Collector};
use items_2::collect::{Collectable, Collector};
use netpod::log::*;
use serde_json::Value as JsonValue;
use std::fmt;
use std::time::Duration;
use std::time::{Duration, Instant};
// This is meant to work with trait object event containers (crate items_2)
#[allow(unused)]
macro_rules! trace2 {
(D$($arg:tt)*) => ();
($($arg:tt)*) => (eprintln!($($arg)*));
}
// TODO rename, it is also used for binned:
pub async fn collect_plain_events_json<T, S>(stream: S, timeout: Duration, events_max: u64) -> Result<JsonValue, Error>
#[allow(unused)]
macro_rules! trace3 {
(D$($arg:tt)*) => ();
($($arg:tt)*) => (eprintln!($($arg)*));
}
#[allow(unused)]
macro_rules! trace4 {
(D$($arg:tt)*) => ();
($($arg:tt)*) => (eprintln!($($arg)*));
}
pub async fn collect<T, S>(
stream: S,
deadline: Instant,
events_max: u64,
) -> Result<<<T as Collectable>::Collector as Collector>::Output, Error>
where
S: Stream<Item = Sitemty<T>> + Unpin,
T: Collectable + fmt::Debug,
{
let deadline = tokio::time::Instant::now() + timeout;
// TODO in general a Collector does not need to know about the expected number of bins.
// It would make more sense for some specific Collector kind to know.
// Therefore introduce finer grained types.
let mut collector: Option<Box<dyn Collector>> = None;
let mut i1 = 0;
let mut collector: Option<<T as Collectable>::Collector> = None;
let mut stream = stream;
let deadline = deadline.into();
let mut range_complete = false;
let mut total_duration = Duration::ZERO;
loop {
let item = if i1 == 0 {
stream.next().await
} else {
if false {
None
} else {
match tokio::time::timeout_at(deadline, stream.next()).await {
Ok(k) => k,
Err(_) => {
eprintln!("TODO [smc3j3rwha732ru8wcnfgi]");
err::todo();
//collector.set_timed_out();
None
}
let item = match tokio::time::timeout_at(deadline, stream.next()).await {
Ok(Some(k)) => k,
Ok(None) => break,
Err(_e) => {
if let Some(coll) = collector.as_mut() {
coll.set_timed_out();
} else {
eprintln!("TODO [861a95813]");
err::todo();
}
break;
}
};
match item {
Some(item) => {
match item {
Ok(item) => match item {
StreamItem::Log(item) => {
trace!("collect_plain_events_json log {:?}", item);
Ok(item) => match item {
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {
range_complete = true;
if let Some(coll) = collector.as_mut() {
coll.set_range_complete();
} else {
eprintln!("TODO [7cc0fca8f]");
err::todo();
}
StreamItem::Stats(item) => {
use items::StatsItem;
use netpod::DiskStats;
match item {
// TODO factor and simplify the stats collection:
StatsItem::EventDataReadStats(_) => {}
StatsItem::RangeFilterStats(_) => {}
StatsItem::DiskStats(item) => match item {
DiskStats::OpenStats(k) => {
total_duration += k.duration;
}
DiskStats::SeekStats(k) => {
total_duration += k.duration;
}
DiskStats::ReadStats(k) => {
total_duration += k.duration;
}
DiskStats::ReadExactStats(k) => {
total_duration += k.duration;
}
},
}
}
RangeCompletableItem::Data(mut item) => {
eprintln!("COLLECTOR INGEST ITEM");
if collector.is_none() {
let c = item.new_collector();
collector = Some(c);
}
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {
eprintln!("TODO [73jdfcgf947d]");
err::todo();
//collector.set_range_complete();
let coll = collector.as_mut().unwrap();
coll.ingest(&mut item);
if coll.len() as u64 >= events_max {
break;
}
}
},
StreamItem::Log(item) => {
trace!("Log {:?}", item);
}
StreamItem::Stats(item) => {
trace!("Stats {:?}", item);
use items::StatsItem;
use netpod::DiskStats;
match item {
// TODO factor and simplify the stats collection:
StatsItem::EventDataReadStats(_) => {}
StatsItem::RangeFilterStats(_) => {}
StatsItem::DiskStats(item) => match item {
DiskStats::OpenStats(k) => {
total_duration += k.duration;
}
RangeCompletableItem::Data(item) => {
eprintln!("TODO [nx298nu98venusfc8]");
err::todo();
//collector.ingest(&item);
i1 += 1;
if i1 >= events_max {
break;
}
DiskStats::SeekStats(k) => {
total_duration += k.duration;
}
DiskStats::ReadStats(k) => {
total_duration += k.duration;
}
DiskStats::ReadExactStats(k) => {
total_duration += k.duration;
}
},
},
Err(e) => {
// TODO Need to use some flags to get good enough error message for remote user.
Err(e)?;
}
};
}
},
Err(e) => {
// TODO Need to use some flags to get good enough error message for remote user.
Err(e)?;
}
None => break,
}
}
let _ = range_complete;
let res = collector
.ok_or_else(|| Error::with_msg_no_trace(format!("no collector created")))?
.result()?;
let ret = serde_json::to_value(&res)?;
debug!("Total duration: {:?}", total_duration);
Ok(ret)
Ok(res)
}

View File

@@ -1,15 +1,15 @@
#[cfg(test)]
mod collect;
#[cfg(test)]
mod timebin;
use err::Error;
use futures_util::{stream, Stream, StreamExt};
use futures_util::{stream, Stream};
use items::{sitem_data, Sitemty};
use items_2::eventsdim0::EventsDim0;
use items_2::merger_cev::ChannelEventsMerger;
use items_2::{ChannelEvents, Empty};
use netpod::timeunits::SEC;
use std::pin::Pin;
use std::time::Duration;
type BoxedEventStream = Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>;
@@ -41,24 +41,6 @@ fn empty_input() -> Result<(), Error> {
Err(Error::with_msg_no_trace("TODO"))
}
#[test]
fn merge_channel_events() -> Result<(), Error> {
let fut = async {
let inp0 = inmem_test_events_d0_i32_00();
let inp1 = inmem_test_events_d0_i32_01();
let mut merged = ChannelEventsMerger::new(vec![inp0, inp1]);
while let Some(item) = merged.next().await {
eprintln!("item {item:?}");
}
let timeout = Duration::from_millis(4000);
let events_max = 10000;
// TODO add event collection
let collected = crate::collect::collect_plain_events_json(merged, timeout, events_max).await?;
Ok(())
};
runfut(fut)
}
#[test]
fn merge_mergeable_00() -> Result<(), Error> {
let fut = async {

View File

@@ -0,0 +1,22 @@
use crate::test::runfut;
use err::Error;
use futures_util::stream;
use items::sitem_data;
use items_2::testgen::make_some_boxed_d0_f32;
use netpod::timeunits::SEC;
use std::time::{Duration, Instant};
#[test]
fn collect_channel_events() -> Result<(), Error> {
let fut = async {
let evs0 = make_some_boxed_d0_f32(20, SEC * 10, SEC * 1, 0, 28736487);
let evs1 = make_some_boxed_d0_f32(20, SEC * 30, SEC * 1, 0, 882716583);
let stream = stream::iter(vec![sitem_data(evs0), sitem_data(evs1)]);
let deadline = Instant::now() + Duration::from_millis(4000);
let events_max = 10000;
let res = crate::collect::collect(stream, deadline, events_max).await?;
eprintln!("collected result: {res:?}");
Ok(())
};
runfut(fut)
}