Refactoring

This commit is contained in:
Dominik Werder
2022-06-15 14:27:38 +02:00
parent 7063842c4c
commit 9e3395bf13
25 changed files with 429 additions and 421 deletions

View File

@@ -112,16 +112,12 @@ impl ChannelExecFunction for BinnedBinaryChannelExec {
debug!(
"BinnedBinaryChannelExec no covering range for prebinned, merge from remotes instead {range:?}"
);
// TODO let BinnedQuery provide the DiskIoTune.
let mut disk_io_tune = netpod::DiskIoTune::default();
disk_io_tune.read_buffer_len = self.query.disk_io_buffer_size() as usize;
let evq = RawEventsQuery {
channel: self.query.channel().clone(),
range: self.query.range().clone(),
agg_kind: self.query.agg_kind().clone(),
disk_io_tune,
do_decompress: true,
};
// TODO let BinnedQuery provide the DiskIoTune and pass to RawEventsQuery:
let evq = RawEventsQuery::new(
self.query.channel().clone(),
self.query.range().clone(),
self.query.agg_kind().clone(),
);
let x_bin_count = x_bin_count(&shape, self.query.agg_kind());
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster.clone());
let s = TBinnerStream::<_, <ENP as EventsNodeProcessor>::Output>::new(
@@ -369,16 +365,12 @@ impl ChannelExecFunction for BinnedJsonChannelExec {
}
Ok(None) => {
debug!("BinnedJsonChannelExec no covering range for prebinned, merge from remotes instead {range:?}");
// TODO let BinnedQuery provide the DiskIoTune.
let mut disk_io_tune = netpod::DiskIoTune::default();
disk_io_tune.read_buffer_len = self.query.disk_io_buffer_size() as usize;
let evq = RawEventsQuery {
channel: self.query.channel().clone(),
range: self.query.range().clone(),
agg_kind: self.query.agg_kind().clone(),
disk_io_tune,
do_decompress: true,
};
// TODO let BinnedQuery provide the DiskIoTune and pass to RawEventsQuery:
let evq = RawEventsQuery::new(
self.query.channel().clone(),
self.query.range().clone(),
self.query.agg_kind().clone(),
);
let x_bin_count = x_bin_count(&shape, self.query.agg_kind());
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster.clone());
let s = TBinnerStream::<_, <ENP as EventsNodeProcessor>::Output>::new(

View File

@@ -103,16 +103,12 @@ where
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
Error,
> {
// TODO let PreBinnedQuery provide the tune:
let mut disk_io_tune = netpod::DiskIoTune::default();
disk_io_tune.read_buffer_len = self.query.disk_io_buffer_size();
let evq = RawEventsQuery {
channel: self.query.channel().clone(),
range: self.query.patch().patch_range(),
agg_kind: self.query.agg_kind().clone(),
disk_io_tune,
do_decompress: true,
};
// TODO let PreBinnedQuery provide the tune and pass to RawEventsQuery:
let evq = RawEventsQuery::new(
self.query.channel().clone(),
self.query.patch().patch_range(),
self.query.agg_kind().clone(),
);
if self.query.patch().patch_t_len() % self.query.patch().bin_t_len() != 0 {
let msg = format!(
"Patch length inconsistency {} {}",
@@ -219,6 +215,20 @@ where
}
Ok(())
}
fn poll_open_check_local_file(
self: &mut Self,
_fut: Pin<Box<dyn Future<Output = Result<File, io::Error>> + Send>>,
) -> (
Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>>,
Pin<Box<dyn Future<Output = Result<File, io::Error>> + Send>>,
) {
todo!()
}
fn _check_for_existing_cached_data(&mut self) -> Result<(), Error> {
todo!()
}
}
impl<NTY, END, EVS, ENP> Stream for PreBinnedValueStream<NTY, END, EVS, ENP>
@@ -367,6 +377,10 @@ where
}
Pending => Pending,
}
} else if let Some(fut) = self.open_check_local_file.take() {
let (res, fut) = Self::poll_open_check_local_file(&mut self, fut);
self.open_check_local_file = Some(fut);
res
} else if let Some(fut) = self.open_check_local_file.as_mut() {
match fut.poll_unpin(cx) {
Ready(item) => {

View File

@@ -77,6 +77,8 @@ impl AsyncRead for HttpBodyAsAsyncRead {
}
}
// For file-based caching, this determined the node where the cache file is located.
// No longer needed for scylla-based caching.
pub fn node_ix_for_patch(patch_coord: &PreBinnedPatchCoord, channel: &Channel, cluster: &Cluster) -> u32 {
let mut hash = tiny_keccak::Sha3::v256();
hash.update(channel.backend.as_bytes());
@@ -158,6 +160,7 @@ pub struct WrittenPbCache {
pub duration: Duration,
}
// TODO only used for old archiver
pub async fn write_pb_cache_min_max_avg_scalar<T>(
values: T,
patch: PreBinnedPatchCoord,

View File

@@ -3,6 +3,7 @@ use crate::decode::{
BigEndian, Endianness, EventValueFromBytes, EventValueShape, EventValuesDim0Case, EventValuesDim1Case,
LittleEndian, NumFromBytes,
};
use crate::events::PlainEventsQuery;
use crate::merge::mergedfromremotes::MergedFromRemotes;
use bytes::Bytes;
use err::Error;
@@ -18,9 +19,7 @@ use items::{
};
use netpod::log::*;
use netpod::query::RawEventsQuery;
use netpod::{
AggKind, ByteOrder, Channel, ChannelConfigQuery, NanoRange, NodeConfigCached, PerfOpts, ScalarType, Shape,
};
use netpod::{AggKind, ByteOrder, Channel, NanoRange, NodeConfigCached, PerfOpts, ScalarType, Shape};
use serde::de::DeserializeOwned;
use serde_json::Value as JsonValue;
use std::fmt::Debug;
@@ -195,8 +194,8 @@ where
pub async fn channel_exec<F>(
f: F,
channel: &Channel,
range: &NanoRange,
_channel: &Channel,
_range: &NanoRange,
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
@@ -221,17 +220,15 @@ pub struct PlainEvents {
channel: Channel,
range: NanoRange,
agg_kind: AggKind,
disk_io_buffer_size: usize,
node_config: NodeConfigCached,
}
impl PlainEvents {
pub fn new(channel: Channel, range: NanoRange, disk_io_buffer_size: usize, node_config: NodeConfigCached) -> Self {
pub fn new(channel: Channel, range: NanoRange, node_config: NodeConfigCached) -> Self {
Self {
channel,
range,
agg_kind: AggKind::Plain,
disk_io_buffer_size,
node_config,
}
}
@@ -265,16 +262,8 @@ impl ChannelExecFunction for PlainEvents {
let _ = byte_order;
let _ = event_value_shape;
let perf_opts = PerfOpts { inmem_bufcap: 4096 };
// TODO let upstream provide DiskIoTune
let mut disk_io_tune = netpod::DiskIoTune::default();
disk_io_tune.read_buffer_len = self.disk_io_buffer_size;
let evq = RawEventsQuery {
channel: self.channel,
range: self.range,
agg_kind: self.agg_kind,
disk_io_tune,
do_decompress: true,
};
// TODO let upstream provide DiskIoTune and pass in RawEventsQuery:
let evq = RawEventsQuery::new(self.channel, self.range, self.agg_kind);
let s = MergedFromRemotes::<Identity<NTY>>::new(evq, perf_opts, self.node_config.node_config.cluster);
let s = s.map(|item| Box::new(item) as Box<dyn Framable>);
Ok(Box::pin(s))
@@ -286,10 +275,10 @@ impl ChannelExecFunction for PlainEvents {
}
pub struct PlainEventsJson {
query: PlainEventsQuery,
channel: Channel,
range: NanoRange,
agg_kind: AggKind,
disk_io_buffer_size: usize,
timeout: Duration,
node_config: NodeConfigCached,
events_max: u64,
@@ -298,19 +287,19 @@ pub struct PlainEventsJson {
impl PlainEventsJson {
pub fn new(
query: PlainEventsQuery,
channel: Channel,
range: NanoRange,
disk_io_buffer_size: usize,
timeout: Duration,
node_config: NodeConfigCached,
events_max: u64,
do_log: bool,
) -> Self {
Self {
query,
channel,
range,
agg_kind: AggKind::Plain,
disk_io_buffer_size,
timeout,
node_config,
events_max,
@@ -373,6 +362,7 @@ where
}
}
StreamItem::Stats(item) => match item {
// TODO factor and simplify the stats collection:
items::StatsItem::EventDataReadStats(_) => {}
items::StatsItem::RangeFilterStats(_) => {}
items::StatsItem::DiskStats(item) => match item {
@@ -422,10 +412,10 @@ impl ChannelExecFunction for PlainEventsJson {
fn exec<NTY, END, EVS, ENP>(
self,
byte_order: END,
_byte_order: END,
_scalar_type: ScalarType,
_shape: Shape,
event_value_shape: EVS,
_event_value_shape: EVS,
_events_node_proc: ENP,
) -> Result<Self::Output, Error>
where
@@ -443,19 +433,11 @@ impl ChannelExecFunction for PlainEventsJson {
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>:
FrameType + Framable + DeserializeOwned,
{
let _ = byte_order;
let _ = event_value_shape;
let perf_opts = PerfOpts { inmem_bufcap: 4096 };
// TODO let upstream provide DiskIoTune
let mut disk_io_tune = netpod::DiskIoTune::default();
disk_io_tune.read_buffer_len = self.disk_io_buffer_size;
let evq = RawEventsQuery {
channel: self.channel,
range: self.range,
agg_kind: self.agg_kind,
disk_io_tune,
do_decompress: true,
};
// TODO let upstream provide DiskIoTune and set in RawEventsQuery.
let mut evq = RawEventsQuery::new(self.channel, self.range, self.agg_kind);
evq.do_test_main_error = self.query.do_test_main_error();
evq.do_test_stream_error = self.query.do_test_stream_error();
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster);
let f = collect_plain_events_json(s, self.timeout, 0, self.events_max, self.do_log);
let f = FutureExt::map(f, |item| match item {

View File

@@ -7,101 +7,7 @@ use url::Url;
// TODO move this query type out of this `binned` mod
#[derive(Clone, Debug)]
pub struct PlainEventsBinaryQuery {
channel: Channel,
range: NanoRange,
disk_io_buffer_size: usize,
report_error: bool,
timeout: Duration,
}
impl PlainEventsBinaryQuery {
pub fn new(channel: Channel, range: NanoRange, disk_io_buffer_size: usize) -> Self {
Self {
channel,
range,
disk_io_buffer_size,
report_error: false,
timeout: Duration::from_millis(10000),
}
}
pub fn from_url(url: &Url) -> Result<Self, Error> {
let pairs = get_url_query_pairs(url);
let beg_date = pairs.get("begDate").ok_or(Error::with_msg("missing begDate"))?;
let end_date = pairs.get("endDate").ok_or(Error::with_msg("missing endDate"))?;
let ret = Self {
channel: channel_from_pairs(&pairs)?,
range: NanoRange {
beg: beg_date.parse::<DateTime<Utc>>()?.to_nanos(),
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
disk_io_buffer_size: pairs
.get("diskIoBufferSize")
.map_or("4096", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse diskIoBufferSize {:?}", e)))?,
report_error: pairs
.get("reportError")
.map_or("false", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse reportError {:?}", e)))?,
timeout: pairs
.get("timeout")
.map_or("10000", |k| k)
.parse::<u64>()
.map(|k| Duration::from_millis(k))
.map_err(|e| Error::with_msg(format!("can not parse timeout {:?}", e)))?,
};
Ok(ret)
}
pub fn range(&self) -> &NanoRange {
&self.range
}
pub fn channel(&self) -> &Channel {
&self.channel
}
pub fn disk_io_buffer_size(&self) -> usize {
self.disk_io_buffer_size
}
pub fn report_error(&self) -> bool {
self.report_error
}
pub fn timeout(&self) -> Duration {
self.timeout
}
pub fn set_timeout(&mut self, k: Duration) {
self.timeout = k;
}
}
impl AppendToUrl for PlainEventsBinaryQuery {
fn append_to_url(&self, url: &mut Url) {
let date_fmt = "%Y-%m-%dT%H:%M:%S.%3fZ";
channel_append_to_url(url, &self.channel);
let mut g = url.query_pairs_mut();
g.append_pair(
"begDate",
&Utc.timestamp_nanos(self.range.beg as i64).format(date_fmt).to_string(),
);
g.append_pair(
"endDate",
&Utc.timestamp_nanos(self.range.end as i64).format(date_fmt).to_string(),
);
g.append_pair("diskIoBufferSize", &format!("{}", self.disk_io_buffer_size));
g.append_pair("timeout", &format!("{}", self.timeout.as_millis()));
}
}
// TODO move this query type out of this `binned` mod
#[derive(Clone, Debug)]
pub struct PlainEventsJsonQuery {
pub struct PlainEventsQuery {
channel: Channel,
range: NanoRange,
disk_io_buffer_size: usize,
@@ -109,9 +15,11 @@ pub struct PlainEventsJsonQuery {
timeout: Duration,
events_max: Option<u64>,
do_log: bool,
do_test_main_error: bool,
do_test_stream_error: bool,
}
impl PlainEventsJsonQuery {
impl PlainEventsQuery {
pub fn new(
channel: Channel,
range: NanoRange,
@@ -127,6 +35,8 @@ impl PlainEventsJsonQuery {
timeout: Duration::from_millis(10000),
events_max,
do_log,
do_test_main_error: false,
do_test_stream_error: false,
}
}
@@ -164,6 +74,16 @@ impl PlainEventsJsonQuery {
.map_or("false", |k| k)
.parse()
.map_err(|e| Error::with_public_msg(format!("can not parse doLog {:?}", e)))?,
do_test_main_error: pairs
.get("doTestMainError")
.map_or("false", |k| k)
.parse()
.map_err(|e| Error::with_public_msg(format!("can not parse doTestMainError {:?}", e)))?,
do_test_stream_error: pairs
.get("doTestStreamError")
.map_or("false", |k| k)
.parse()
.map_err(|e| Error::with_public_msg(format!("can not parse doTestStreamError {:?}", e)))?,
};
Ok(ret)
}
@@ -202,10 +122,26 @@ impl PlainEventsJsonQuery {
self.do_log
}
pub fn do_test_main_error(&self) -> bool {
self.do_test_main_error
}
pub fn do_test_stream_error(&self) -> bool {
self.do_test_stream_error
}
pub fn set_timeout(&mut self, k: Duration) {
self.timeout = k;
}
pub fn set_do_test_main_error(&mut self, k: bool) {
self.do_test_main_error = k;
}
pub fn set_do_test_stream_error(&mut self, k: bool) {
self.do_test_stream_error = k;
}
pub fn append_to_url(&self, url: &mut Url) {
let date_fmt = "%Y-%m-%dT%H:%M:%S.%3fZ";
channel_append_to_url(url, &self.channel);
@@ -227,25 +163,25 @@ impl PlainEventsJsonQuery {
}
}
impl HasBackend for PlainEventsJsonQuery {
impl HasBackend for PlainEventsQuery {
fn backend(&self) -> &str {
&self.channel.backend
}
}
impl HasTimeout for PlainEventsJsonQuery {
impl HasTimeout for PlainEventsQuery {
fn timeout(&self) -> Duration {
self.timeout.clone()
}
}
impl FromUrl for PlainEventsJsonQuery {
impl FromUrl for PlainEventsQuery {
fn from_url(url: &Url) -> Result<Self, Error> {
Self::from_url(url)
}
}
impl AppendToUrl for PlainEventsJsonQuery {
impl AppendToUrl for PlainEventsQuery {
fn append_to_url(&self, url: &mut Url) {
self.append_to_url(url)
}

View File

@@ -158,11 +158,14 @@ where
let frame_crc_ind =
u32::from_le_bytes(*arrayref::array_ref![buf, INMEM_FRAME_HEAD + len as usize, 4]);
let payload_crc_ind = u32::from_le_bytes(*arrayref::array_ref![buf, 16, 4]);
//info!("len {}", len);
//info!("payload_crc_ind {}", payload_crc_ind);
//info!("frame_crc_ind {}", frame_crc_ind);
let payload_crc_match = payload_crc_ind == payload_crc;
let frame_crc_match = frame_crc_ind == frame_crc;
if !payload_crc_match || !frame_crc_match {
if !frame_crc_match || !payload_crc_match {
let ss = String::from_utf8_lossy(&buf[..buf.len().min(256)]);
warn!("CRC mismatch A\n{ss:?}");
warn!("CRC mismatch A frame_crc_match {frame_crc_match} payload_crc_match {payload_crc_match}\n{ss:?}");
return (
Some(Some(Err(Error::with_msg(format!(
"InMemoryFrameAsyncReadStream tryparse crc mismatch A {} {}",