This commit is contained in:
Dominik Werder
2021-06-25 12:56:42 +02:00
parent 7e0a540aa6
commit 4b2048c103
20 changed files with 433 additions and 286 deletions

View File

@@ -82,7 +82,30 @@ where
Poll::Ready(None)
} else {
let inp_poll_span = span!(Level::TRACE, "into_t_inp_poll");
inp_poll_span.in_scope(|| self.inp.poll_next_unpin(cx))
let t = inp_poll_span.in_scope(|| self.inp.poll_next_unpin(cx));
if false {
// TODO collect as stats:
use Poll::*;
match &t {
Ready(item) => match item {
Some(item) => match item {
Ok(item) => match item {
StreamItem::DataItem(item) => match item {
RangeCompletableItem::Data(item) => {
info!("time binner got batch len {}", item.len());
}
_ => {}
},
_ => {}
},
_ => {}
},
_ => {}
},
_ => {}
}
}
t
}
}

View File

@@ -1,4 +1,4 @@
use crate::eventblobs::EventBlobsComplete;
use crate::eventblobs::EventChunkerMultifile;
use crate::eventchunker::EventChunkerConf;
use netpod::timeunits::*;
use netpod::{ByteOrder, ByteSize, Channel, ChannelConfig, NanoRange, Nanos, Node, ScalarType, Shape};
@@ -53,7 +53,7 @@ async fn agg_x_dim_0_inner() {
let ts2 = ts1 + HOUR * 24;
let range = NanoRange { beg: ts1, end: ts2 };
let event_chunker_conf = EventChunkerConf::new(ByteSize::kb(1024));
let fut1 = EventBlobsComplete::new(
let fut1 = EventChunkerMultifile::new(
range.clone(),
query.channel_config.clone(),
node.clone(),
@@ -102,7 +102,7 @@ async fn agg_x_dim_1_inner() {
let ts2 = ts1 + HOUR * 24;
let range = NanoRange { beg: ts1, end: ts2 };
let event_chunker_conf = EventChunkerConf::new(ByteSize::kb(1024));
let fut1 = super::eventblobs::EventBlobsComplete::new(
let fut1 = super::eventblobs::EventChunkerMultifile::new(
range.clone(),
query.channel_config.clone(),
node.clone(),

View File

@@ -137,6 +137,7 @@ impl ChannelExecFunction for BinnedBinaryChannelExec {
shape,
self.query.agg_kind().clone(),
self.query.cache_usage().clone(),
self.query.disk_io_buffer_size(),
&self.node_config,
self.query.disk_stats_every().clone(),
self.query.report_error(),
@@ -156,6 +157,7 @@ impl ChannelExecFunction for BinnedBinaryChannelExec {
channel: self.query.channel().clone(),
range: self.query.range().clone(),
agg_kind: self.query.agg_kind().clone(),
disk_io_buffer_size: self.query.disk_io_buffer_size(),
};
let x_bin_count = x_bin_count(&shape, self.query.agg_kind());
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster.clone());
@@ -403,6 +405,7 @@ impl ChannelExecFunction for BinnedJsonChannelExec {
shape,
self.query.agg_kind().clone(),
self.query.cache_usage().clone(),
self.query.disk_io_buffer_size(),
&self.node_config,
self.query.disk_stats_every().clone(),
self.query.report_error(),
@@ -423,6 +426,7 @@ impl ChannelExecFunction for BinnedJsonChannelExec {
channel: self.query.channel().clone(),
range: self.query.range().clone(),
agg_kind: self.query.agg_kind().clone(),
disk_io_buffer_size: self.query.disk_io_buffer_size(),
};
let x_bin_count = x_bin_count(&shape, self.query.agg_kind());
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster.clone());

View File

@@ -168,6 +168,7 @@ where
shape: Shape,
agg_kind: AggKind,
cache_usage: CacheUsage,
disk_io_buffer_size: usize,
node_config: &NodeConfigCached,
disk_stats_every: ByteSize,
report_error: bool,
@@ -193,6 +194,7 @@ where
channel.clone(),
agg_kind.clone(),
cache_usage.clone(),
disk_io_buffer_size,
disk_stats_every.clone(),
report_error,
);

View File

@@ -114,6 +114,7 @@ where
channel: self.query.channel().clone(),
range: self.query.patch().patch_range(),
agg_kind: self.query.agg_kind().clone(),
disk_io_buffer_size: self.query.disk_io_buffer_size(),
};
if self.query.patch().patch_t_len() % self.query.patch().bin_t_len() != 0 {
let msg = format!(
@@ -172,6 +173,7 @@ where
let s = futures_util::stream::iter(patch_it)
.map({
let q2 = self.query.clone();
let disk_io_buffer_size = self.query.disk_io_buffer_size();
let disk_stats_every = self.query.disk_stats_every().clone();
let report_error = self.query.report_error();
move |patch| {
@@ -180,6 +182,7 @@ where
q2.channel().clone(),
q2.agg_kind().clone(),
q2.cache_usage().clone(),
disk_io_buffer_size,
disk_stats_every.clone(),
report_error,
);

View File

@@ -16,6 +16,7 @@ pub struct PreBinnedQuery {
agg_kind: AggKind,
channel: Channel,
cache_usage: CacheUsage,
disk_io_buffer_size: usize,
disk_stats_every: ByteSize,
report_error: bool,
}
@@ -26,6 +27,7 @@ impl PreBinnedQuery {
channel: Channel,
agg_kind: AggKind,
cache_usage: CacheUsage,
disk_io_buffer_size: usize,
disk_stats_every: ByteSize,
report_error: bool,
) -> Self {
@@ -34,6 +36,7 @@ impl PreBinnedQuery {
agg_kind,
channel,
cache_usage,
disk_io_buffer_size,
disk_stats_every,
report_error,
}
@@ -68,6 +71,11 @@ impl PreBinnedQuery {
channel: channel_from_pairs(&pairs)?,
agg_kind: agg_kind_from_binning_scheme(&pairs).unwrap_or(AggKind::DimXBins1),
cache_usage: CacheUsage::from_pairs(&pairs)?,
disk_io_buffer_size: pairs
.get("diskIoBufferSize")
.map_or("4096", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse diskIoBufferSize {:?}", e)))?,
disk_stats_every: ByteSize::kb(disk_stats_every),
report_error: pairs
.get("reportError")
@@ -107,6 +115,10 @@ impl PreBinnedQuery {
pub fn cache_usage(&self) -> CacheUsage {
self.cache_usage.clone()
}
pub fn disk_io_buffer_size(&self) -> usize {
self.disk_io_buffer_size
}
}
impl AppendToUrl for PreBinnedQuery {
@@ -117,6 +129,7 @@ impl AppendToUrl for PreBinnedQuery {
g.append_pair("channelBackend", &self.channel.backend);
g.append_pair("channelName", &self.channel.name);
g.append_pair("cacheUsage", &format!("{}", self.cache_usage.query_param_value()));
g.append_pair("diskIoBufferSize", &format!("{}", self.disk_io_buffer_size));
g.append_pair("diskStatsEveryKb", &format!("{}", self.disk_stats_every.bytes() / 1024));
g.append_pair("reportError", &format!("{}", self.report_error()));
}
@@ -181,6 +194,7 @@ pub struct BinnedQuery {
bin_count: u32,
agg_kind: AggKind,
cache_usage: CacheUsage,
disk_io_buffer_size: usize,
disk_stats_every: ByteSize,
report_error: bool,
timeout: Duration,
@@ -196,6 +210,7 @@ impl BinnedQuery {
bin_count,
agg_kind,
cache_usage: CacheUsage::Use,
disk_io_buffer_size: 1024 * 4,
disk_stats_every: ByteSize(1024 * 1024 * 4),
report_error: false,
timeout: Duration::from_millis(2000),
@@ -228,6 +243,10 @@ impl BinnedQuery {
&self.disk_stats_every
}
pub fn disk_io_buffer_size(&self) -> usize {
self.disk_io_buffer_size
}
pub fn report_error(&self) -> bool {
self.report_error
}
@@ -255,6 +274,10 @@ impl BinnedQuery {
pub fn set_timeout(&mut self, k: Duration) {
self.timeout = k;
}
pub fn set_disk_io_buffer_size(&mut self, k: usize) {
self.disk_io_buffer_size = k;
}
}
impl HasBackend for BinnedQuery {
@@ -291,6 +314,11 @@ impl FromUrl for BinnedQuery {
.map_err(|e| Error::with_msg(format!("can not parse binCount {:?}", e)))?,
agg_kind: agg_kind_from_binning_scheme(&pairs).unwrap_or(AggKind::DimXBins1),
cache_usage: CacheUsage::from_pairs(&pairs)?,
disk_io_buffer_size: pairs
.get("diskIoBufferSize")
.map_or("4096", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse diskIoBufferSize {:?}", e)))?,
disk_stats_every: ByteSize::kb(disk_stats_every),
report_error: pairs
.get("reportError")
@@ -342,6 +370,7 @@ impl AppendToUrl for BinnedQuery {
}
{
let mut g = url.query_pairs_mut();
g.append_pair("diskIoBufferSize", &format!("{}", self.disk_io_buffer_size));
g.append_pair("diskStatsEveryKb", &format!("{}", self.disk_stats_every.bytes() / 1024));
g.append_pair("timeout", &format!("{}", self.timeout.as_millis()));
g.append_pair("abortAfterBinCount", &format!("{}", self.abort_after_bin_count));

View File

@@ -209,15 +209,17 @@ pub struct PlainEvents {
channel: Channel,
range: NanoRange,
agg_kind: AggKind,
disk_io_buffer_size: usize,
node_config: NodeConfigCached,
}
impl PlainEvents {
pub fn new(channel: Channel, range: NanoRange, node_config: NodeConfigCached) -> Self {
pub fn new(channel: Channel, range: NanoRange, disk_io_buffer_size: usize, node_config: NodeConfigCached) -> Self {
Self {
channel,
range,
agg_kind: AggKind::Plain,
disk_io_buffer_size,
node_config,
}
}
@@ -254,6 +256,7 @@ impl ChannelExecFunction for PlainEvents {
channel: self.channel,
range: self.range,
agg_kind: self.agg_kind,
disk_io_buffer_size: self.disk_io_buffer_size,
};
let s = MergedFromRemotes::<Identity<NTY>>::new(evq, perf_opts, self.node_config.node_config.cluster);
let s = s.map(|item| Box::new(item) as Box<dyn Framable>);
@@ -269,6 +272,7 @@ pub struct PlainEventsJson {
channel: Channel,
range: NanoRange,
agg_kind: AggKind,
disk_io_buffer_size: usize,
timeout: Duration,
node_config: NodeConfigCached,
do_log: bool,
@@ -278,6 +282,7 @@ impl PlainEventsJson {
pub fn new(
channel: Channel,
range: NanoRange,
disk_io_buffer_size: usize,
timeout: Duration,
node_config: NodeConfigCached,
do_log: bool,
@@ -286,6 +291,7 @@ impl PlainEventsJson {
channel,
range,
agg_kind: AggKind::Plain,
disk_io_buffer_size,
timeout,
node_config,
do_log,
@@ -404,6 +410,7 @@ impl ChannelExecFunction for PlainEventsJson {
channel: self.channel,
range: self.range,
agg_kind: self.agg_kind,
disk_io_buffer_size: self.disk_io_buffer_size,
};
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster);
let f = collect_plain_events_json(s, self.timeout, 0, self.do_log);

View File

@@ -6,7 +6,7 @@ use crate::binned::{
Bool, EventValuesAggregator, EventsNodeProcessor, FilterFittingInside, MinMaxAvgBins, NumOps, PushableIndex,
RangeCompletableItem, RangeOverlapInfo, ReadPbv, ReadableFromFile, WithLen, WithTimestamps,
};
use crate::eventblobs::EventBlobsComplete;
use crate::eventblobs::EventChunkerMultifile;
use crate::eventchunker::EventFull;
use err::Error;
use futures_core::Stream;
@@ -422,7 +422,7 @@ where
EVS: EventValueShape<NTY, END>,
{
evs: EVS,
event_blobs: EventBlobsComplete,
event_blobs: EventChunkerMultifile,
completed: bool,
errored: bool,
_m1: PhantomData<NTY>,
@@ -436,7 +436,7 @@ where
END: Endianness,
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END>,
{
pub fn new(evs: EVS, event_blobs: EventBlobsComplete) -> Self {
pub fn new(evs: EVS, event_blobs: EventChunkerMultifile) -> Self {
Self {
evs,
event_blobs,
@@ -450,6 +450,8 @@ where
fn decode(&mut self, ev: &EventFull) -> Result<EventValues<<EVS as EventValueFromBytes<NTY, END>>::Output>, Error> {
let mut ret = EventValues::empty();
ret.tss.reserve(ev.tss.len());
ret.values.reserve(ev.tss.len());
for i1 in 0..ev.tss.len() {
// TODO check that dtype, event endianness and event shape match our static
// expectation about the data in this channel.

View File

@@ -15,7 +15,7 @@ use std::sync::atomic::AtomicU64;
use std::sync::Arc;
use std::task::{Context, Poll};
pub struct EventBlobsComplete {
pub struct EventChunkerMultifile {
channel_config: ChannelConfig,
file_chan: async_channel::Receiver<Result<OpenedFile, Error>>,
evs: Option<EventChunker>,
@@ -30,7 +30,7 @@ pub struct EventBlobsComplete {
node_ix: usize,
}
impl EventBlobsComplete {
impl EventChunkerMultifile {
pub fn new(
range: NanoRange,
channel_config: ChannelConfig,
@@ -56,7 +56,7 @@ impl EventBlobsComplete {
}
}
impl Stream for EventBlobsComplete {
impl Stream for EventChunkerMultifile {
type Item = Result<StreamItem<RangeCompletableItem<EventFull>>, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {

View File

@@ -11,15 +11,17 @@ use url::Url;
pub struct PlainEventsBinaryQuery {
channel: Channel,
range: NanoRange,
disk_io_buffer_size: usize,
report_error: bool,
timeout: Duration,
}
impl PlainEventsBinaryQuery {
pub fn new(channel: Channel, range: NanoRange) -> Self {
pub fn new(channel: Channel, range: NanoRange, disk_io_buffer_size: usize) -> Self {
Self {
channel,
range,
disk_io_buffer_size,
report_error: false,
timeout: Duration::from_millis(10000),
}
@@ -35,6 +37,11 @@ impl PlainEventsBinaryQuery {
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
channel: channel_from_pairs(&pairs)?,
disk_io_buffer_size: pairs
.get("diskIoBufferSize")
.map_or("4096", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse diskIoBufferSize {:?}", e)))?,
report_error: pairs
.get("reportError")
.map_or("false", |k| k)
@@ -58,6 +65,10 @@ impl PlainEventsBinaryQuery {
&self.channel
}
pub fn disk_io_buffer_size(&self) -> usize {
self.disk_io_buffer_size
}
pub fn report_error(&self) -> bool {
self.report_error
}
@@ -85,6 +96,7 @@ impl AppendToUrl for PlainEventsBinaryQuery {
"endDate",
&Utc.timestamp_nanos(self.range.end as i64).format(date_fmt).to_string(),
);
g.append_pair("diskIoBufferSize", &format!("{}", self.disk_io_buffer_size));
g.append_pair("timeout", &format!("{}", self.timeout.as_millis()));
}
}
@@ -94,16 +106,18 @@ impl AppendToUrl for PlainEventsBinaryQuery {
pub struct PlainEventsJsonQuery {
channel: Channel,
range: NanoRange,
disk_io_buffer_size: usize,
report_error: bool,
timeout: Duration,
do_log: bool,
}
impl PlainEventsJsonQuery {
pub fn new(channel: Channel, range: NanoRange, do_log: bool) -> Self {
pub fn new(channel: Channel, range: NanoRange, disk_io_buffer_size: usize, do_log: bool) -> Self {
Self {
channel,
range,
disk_io_buffer_size,
report_error: false,
timeout: Duration::from_millis(10000),
do_log,
@@ -120,6 +134,11 @@ impl PlainEventsJsonQuery {
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
channel: channel_from_pairs(&pairs)?,
disk_io_buffer_size: pairs
.get("diskIoBufferSize")
.map_or("4096", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse diskIoBufferSize {:?}", e)))?,
report_error: pairs
.get("reportError")
.map_or("false", |k| k)
@@ -158,6 +177,10 @@ impl PlainEventsJsonQuery {
self.report_error
}
pub fn disk_io_buffer_size(&self) -> usize {
self.disk_io_buffer_size
}
pub fn timeout(&self) -> Duration {
self.timeout
}
@@ -183,6 +206,7 @@ impl PlainEventsJsonQuery {
"endDate",
&Utc.timestamp_nanos(self.range.end as i64).format(date_fmt).to_string(),
);
g.append_pair("diskIoBufferSize", &format!("{}", self.disk_io_buffer_size));
g.append_pair("timeout", &format!("{}", self.timeout.as_millis()));
g.append_pair("doLog", &format!("{}", self.do_log));
}

View File

@@ -189,8 +189,6 @@ where
}
if lowest_ix == usize::MAX {
if self.batch.len() != 0 {
//let k = std::mem::replace(&mut self.batch, MinMaxAvgScalarEventBatch::empty());
//let ret = MinMaxAvgScalarEventBatchStreamItem::Values(k);
let emp = <<ENP as EventsNodeProcessor>::Output>::empty();
let ret = std::mem::replace(&mut self.batch, emp);
self.data_emit_complete = true;

View File

@@ -30,6 +30,7 @@ pub struct RawEventsQuery {
pub channel: Channel,
pub range: NanoRange,
pub agg_kind: AggKind,
pub disk_io_buffer_size: usize,
}
#[derive(Serialize, Deserialize)]

View File

@@ -5,7 +5,7 @@ use crate::decode::{
BigEndian, Endianness, EventValueFromBytes, EventValueShape, EventValuesDim0Case, EventValuesDim1Case,
EventsDecodedStream, LittleEndian, NumFromBytes,
};
use crate::eventblobs::EventBlobsComplete;
use crate::eventblobs::EventChunkerMultifile;
use crate::eventchunker::EventChunkerConf;
use crate::frame::inmem::InMemoryFrameAsyncReadStream;
use crate::frame::makeframe::{decode_frame, make_frame, make_term_frame, Framable};
@@ -99,7 +99,7 @@ impl<E: Into<Error>> From<(E, OwnedWriteHalf)> for ConnErr {
fn make_num_pipeline_stream_evs<NTY, END, EVS, ENP>(
event_value_shape: EVS,
events_node_proc: ENP,
event_blobs: EventBlobsComplete,
event_blobs: EventChunkerMultifile,
) -> Pin<Box<dyn Stream<Item = Box<dyn Framable>> + Send>>
where
NTY: NumOps + NumFromBytes<NTY, END> + 'static,
@@ -286,16 +286,13 @@ async fn events_conn_handler_inner_try(
array: entry.is_array,
compression: entry.is_compressed,
};
// TODO use a requested buffer size
let buffer_size = 1024 * 4;
let event_chunker_conf = EventChunkerConf::new(ByteSize::kb(1024));
let event_blobs = EventBlobsComplete::new(
let event_blobs = EventChunkerMultifile::new(
range.clone(),
channel_config.clone(),
node_config.node.clone(),
node_config.ix,
buffer_size,
evq.disk_io_buffer_size,
event_chunker_conf,
);
let shape = entry.to_shape().unwrap();