WIP refactor data event pipeline

This commit is contained in:
Dominik Werder
2022-11-15 16:16:16 +01:00
parent fb78f1887e
commit eebf8665ce
24 changed files with 800 additions and 180 deletions

View File

@@ -99,6 +99,10 @@ impl LogItem {
pub type Sitemty<T> = Result<StreamItem<RangeCompletableItem<T>>, Error>;
pub fn sitem_data<X>(x: X) -> Sitemty<X> {
Ok(StreamItem::DataItem(RangeCompletableItem::Data(x)))
}
struct VisitLevel;
impl<'de> Visitor<'de> for VisitLevel {

View File

@@ -1,6 +1,12 @@
use crate::{RangeCompletableItem, Sitemty, StreamItem, WithLen};
use err::Error;
use futures_util::{Stream, StreamExt};
use netpod::log::*;
use serde::Serialize;
use serde_json::Value as JsonValue;
use std::fmt;
use std::time::Duration;
use tokio::time::timeout_at;
pub trait Collector: Send + Unpin + WithLen {
type Input: Collectable;
@@ -45,3 +51,98 @@ impl ToJsonResult for Sitemty<serde_json::Value> {
}
}
}
// TODO rename, it is also used for binned:
pub async fn collect_plain_events_json<T, S>(
stream: S,
timeout: Duration,
bin_count_exp: u32,
events_max: u64,
do_log: bool,
) -> Result<JsonValue, Error>
where
S: Stream<Item = Sitemty<T>> + Unpin,
T: Collectable + fmt::Debug,
{
let deadline = tokio::time::Instant::now() + timeout;
// TODO in general a Collector does not need to know about the expected number of bins.
// It would make more sense for some specific Collector kind to know.
// Therefore introduce finer grained types.
let mut collector = <T as Collectable>::new_collector(bin_count_exp);
let mut i1 = 0;
let mut stream = stream;
let mut total_duration = Duration::ZERO;
loop {
let item = if i1 == 0 {
stream.next().await
} else {
if false {
None
} else {
match timeout_at(deadline, stream.next()).await {
Ok(k) => k,
Err(_) => {
collector.set_timed_out();
None
}
}
}
};
match item {
Some(item) => {
match item {
Ok(item) => match item {
StreamItem::Log(item) => {
if do_log {
debug!("collect_plain_events_json log {:?}", item);
}
}
StreamItem::Stats(item) => {
use crate::StatsItem;
use netpod::DiskStats;
match item {
// TODO factor and simplify the stats collection:
StatsItem::EventDataReadStats(_) => {}
StatsItem::RangeFilterStats(_) => {}
StatsItem::DiskStats(item) => match item {
DiskStats::OpenStats(k) => {
total_duration += k.duration;
}
DiskStats::SeekStats(k) => {
total_duration += k.duration;
}
DiskStats::ReadStats(k) => {
total_duration += k.duration;
}
DiskStats::ReadExactStats(k) => {
total_duration += k.duration;
}
},
}
}
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {
collector.set_range_complete();
}
RangeCompletableItem::Data(item) => {
collector.ingest(&item);
i1 += 1;
if i1 >= events_max {
break;
}
}
},
},
Err(e) => {
// TODO Need to use some flags to get good enough error message for remote user.
Err(e)?;
}
};
}
None => break,
}
}
let ret = serde_json::to_value(collector.result()?)?;
debug!("Total duration: {:?}", total_duration);
Ok(ret)
}