Find active channels and deliver values

This commit is contained in:
Dominik Werder
2022-06-03 16:57:59 +02:00
parent 775650c2d8
commit 3cd1b7a640
49 changed files with 1002 additions and 270 deletions

View File

@@ -20,7 +20,6 @@ pub fn make_test_node(id: u32) -> Node {
}),
archiver_appliance: None,
channel_archiver: None,
access_scylla: false,
}
}
@@ -40,6 +39,7 @@ async fn agg_x_dim_0_inner() {
channel: Channel {
backend: "sf-databuffer".into(),
name: "S10BC01-DBAM070:EOM1_T1".into(),
series: None,
},
keyspace: 2,
time_bin_size: Nanos { ns: DAY },
@@ -94,6 +94,7 @@ async fn agg_x_dim_1_inner() {
channel: Channel {
backend: "ks".into(),
name: "wave1".into(),
series: None,
},
keyspace: 3,
time_bin_size: Nanos { ns: DAY },

View File

@@ -54,7 +54,7 @@ impl ChannelExecFunction for BinnedBinaryChannelExec {
fn exec<NTY, END, EVS, ENP>(
self,
_byte_order: END,
_scalar_type: ScalarType,
scalar_type: ScalarType,
shape: Shape,
event_value_shape: EVS,
_events_node_proc: ENP,
@@ -93,6 +93,7 @@ impl ChannelExecFunction for BinnedBinaryChannelExec {
PreBinnedPatchIterator::from_range(pre_range),
self.query.channel().clone(),
range.clone(),
scalar_type,
shape,
self.query.agg_kind().clone(),
self.query.cache_usage().clone(),
@@ -147,12 +148,16 @@ impl ChannelExecFunction for BinnedBinaryChannelExec {
pub async fn binned_bytes_for_http(
query: &BinnedQuery,
scalar_type: ScalarType,
shape: Shape,
node_config: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Result<Bytes, Error>> + Send>>, Error> {
let ret = channel_exec(
BinnedBinaryChannelExec::new(query.clone(), node_config.clone()),
query.channel(),
query.range(),
scalar_type,
shape,
query.agg_kind().clone(),
node_config,
)
@@ -306,7 +311,7 @@ impl ChannelExecFunction for BinnedJsonChannelExec {
fn exec<NTY, END, EVS, ENP>(
self,
_byte_order: END,
_scalar_type: ScalarType,
scalar_type: ScalarType,
shape: Shape,
event_value_shape: EVS,
_events_node_proc: ENP,
@@ -346,6 +351,7 @@ impl ChannelExecFunction for BinnedJsonChannelExec {
PreBinnedPatchIterator::from_range(pre_range),
self.query.channel().clone(),
range.clone(),
scalar_type,
shape,
self.query.agg_kind().clone(),
self.query.cache_usage().clone(),
@@ -400,12 +406,16 @@ impl ChannelExecFunction for BinnedJsonChannelExec {
pub async fn binned_json(
query: &BinnedQuery,
scalar_type: ScalarType,
shape: Shape,
node_config: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Result<Bytes, Error>> + Send>>, Error> {
let ret = channel_exec(
BinnedJsonChannelExec::new(query.clone(), query.timeout(), node_config.clone()),
query.channel(),
query.range(),
scalar_type,
shape,
query.agg_kind().clone(),
node_config,
)

View File

@@ -12,7 +12,7 @@ use netpod::log::*;
use netpod::query::CacheUsage;
use netpod::{
x_bin_count, AggKind, AppendToUrl, BinnedRange, ByteSize, Channel, NodeConfigCached, PerfOpts,
PreBinnedPatchIterator, Shape,
PreBinnedPatchIterator, ScalarType, Shape,
};
use serde::de::DeserializeOwned;
use std::future::ready;
@@ -164,6 +164,7 @@ where
patch_it: PreBinnedPatchIterator,
channel: Channel,
range: BinnedRange,
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
cache_usage: CacheUsage,
@@ -185,12 +186,15 @@ where
let pmax = patches.len();
let inp = futures_util::stream::iter(patches.into_iter().enumerate())
.map({
let shape = shape.clone();
let agg_kind = agg_kind.clone();
let node_config = node_config.clone();
move |(pix, patch)| {
let query = PreBinnedQuery::new(
patch,
channel.clone(),
scalar_type.clone(),
shape.clone(),
agg_kind.clone(),
cache_usage.clone(),
disk_io_buffer_size,

View File

@@ -17,7 +17,7 @@ use items::{
use netpod::log::*;
use netpod::query::{CacheUsage, RawEventsQuery};
use netpod::{
x_bin_count, AggKind, BinnedRange, NodeConfigCached, PerfOpts, PreBinnedPatchIterator, PreBinnedPatchRange, Shape,
x_bin_count, AggKind, BinnedRange, NodeConfigCached, PerfOpts, PreBinnedPatchIterator, PreBinnedPatchRange,
};
use serde::de::DeserializeOwned;
use serde::Serialize;
@@ -37,7 +37,6 @@ where
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch>,
{
query: PreBinnedQuery,
shape: Shape,
agg_kind: AggKind,
node_config: NodeConfigCached,
open_check_local_file: Option<Pin<Box<dyn Future<Output = Result<File, io::Error>> + Send>>>,
@@ -71,10 +70,9 @@ where
// TODO who exactly needs this DeserializeOwned?
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>: FrameType + DeserializeOwned,
{
pub fn new(query: PreBinnedQuery, shape: Shape, agg_kind: AggKind, node_config: &NodeConfigCached) -> Self {
pub fn new(query: PreBinnedQuery, agg_kind: AggKind, node_config: &NodeConfigCached) -> Self {
Self {
query,
shape,
agg_kind,
node_config: node_config.clone(),
open_check_local_file: None,
@@ -133,7 +131,7 @@ where
let ret = TBinnerStream::<_, <ENP as EventsNodeProcessor>::Output>::new(
s,
range,
x_bin_count(&self.shape, &self.agg_kind),
x_bin_count(&self.query.shape().clone(), &self.agg_kind),
self.agg_kind.do_time_weighted(),
);
Ok(Box::pin(ret))
@@ -180,6 +178,8 @@ where
let query = PreBinnedQuery::new(
patch,
q2.channel().clone(),
q2.scalar_type().clone(),
q2.shape().clone(),
q2.agg_kind().clone(),
q2.cache_usage().clone(),
disk_io_buffer_size,

View File

@@ -13,13 +13,13 @@ use items::numops::{BoolNum, NumOps, StringNum};
use items::{
Appendable, Clearable, EventsNodeProcessor, Framable, FrameType, PushableIndex, Sitemty, TimeBinnableType,
};
use netpod::{AggKind, ByteOrder, ChannelConfigQuery, NodeConfigCached, ScalarType, Shape};
use netpod::{AggKind, ByteOrder, NodeConfigCached, ScalarType, Shape};
use serde::de::DeserializeOwned;
use serde::Serialize;
use std::pin::Pin;
fn make_num_pipeline_nty_end_evs_enp<NTY, END, EVS, ENP>(
shape: Shape,
_shape: Shape,
agg_kind: AggKind,
_event_value_shape: EVS,
_events_node_proc: ENP,
@@ -36,7 +36,7 @@ where
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>:
Framable + FrameType + DeserializeOwned,
{
let ret = PreBinnedValueStream::<NTY, END, EVS, ENP>::new(query, shape, agg_kind, node_config);
let ret = PreBinnedValueStream::<NTY, END, EVS, ENP>::new(query, agg_kind, node_config);
let ret = StreamExt::map(ret, |item| Box::new(item) as Box<dyn Framable>);
Box::pin(ret)
}
@@ -138,9 +138,6 @@ macro_rules! match_end {
};
}
// TODO is the distinction on byte order necessary here?
// We should rely on the "events" http api to deliver data, and the cache, both
// of those have fixed endianness.
fn make_num_pipeline(
scalar_type: ScalarType,
byte_order: ByteOrder,
@@ -185,17 +182,11 @@ pub async fn pre_binned_bytes_for_http(
));
return Err(err);
}
let q = ChannelConfigQuery {
channel: query.channel().clone(),
range: query.patch().patch_range(),
expand: query.agg_kind().need_expand(),
};
let conf = httpclient::get_channel_config(&q, node_config).await?;
let ret = make_num_pipeline(
conf.scalar_type.clone(),
query.scalar_type().clone(),
// TODO actually, make_num_pipeline should not depend on endianness.
conf.byte_order.unwrap_or(ByteOrder::LE).clone(),
conf.shape.clone(),
ByteOrder::LE,
query.shape().clone(),
query.agg_kind().clone(),
query.clone(),
node_config,

View File

@@ -1,7 +1,10 @@
use err::Error;
use http::request::Parts;
use netpod::query::{agg_kind_from_binning_scheme, binning_scheme_append_to_url, CacheUsage};
use netpod::{channel_from_pairs, AggKind, AppendToUrl, ByteSize, Channel, PreBinnedPatchCoord};
use netpod::{
channel_append_to_url, channel_from_pairs, AggKind, AppendToUrl, ByteSize, Channel, PreBinnedPatchCoord,
ScalarType, Shape,
};
use std::collections::BTreeMap;
use url::Url;
@@ -10,6 +13,8 @@ pub struct PreBinnedQuery {
patch: PreBinnedPatchCoord,
agg_kind: AggKind,
channel: Channel,
scalar_type: ScalarType,
shape: Shape,
cache_usage: CacheUsage,
disk_io_buffer_size: usize,
disk_stats_every: ByteSize,
@@ -20,6 +25,8 @@ impl PreBinnedQuery {
pub fn new(
patch: PreBinnedPatchCoord,
channel: Channel,
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
cache_usage: CacheUsage,
disk_io_buffer_size: usize,
@@ -28,8 +35,10 @@ impl PreBinnedQuery {
) -> Self {
Self {
patch,
agg_kind,
channel,
scalar_type,
shape,
agg_kind,
cache_usage,
disk_io_buffer_size,
disk_stats_every,
@@ -45,25 +54,35 @@ impl PreBinnedQuery {
let pairs = pairs;
let bin_t_len = pairs
.get("binTlen")
.ok_or(Error::with_msg("missing binTlen"))?
.ok_or_else(|| Error::with_msg("missing binTlen"))?
.parse()?;
let patch_t_len = pairs
.get("patchTlen")
.ok_or(Error::with_msg("missing patchTlen"))?
.ok_or_else(|| Error::with_msg("missing patchTlen"))?
.parse()?;
let patch_ix = pairs
.get("patchIx")
.ok_or(Error::with_msg("missing patchIx"))?
.ok_or_else(|| Error::with_msg("missing patchIx"))?
.parse()?;
let disk_stats_every = pairs
.get("diskStatsEveryKb")
.ok_or(Error::with_msg("missing diskStatsEveryKb"))?;
.ok_or_else(|| Error::with_msg("missing diskStatsEveryKb"))?;
let disk_stats_every = disk_stats_every
.parse()
.map_err(|e| Error::with_msg(format!("can not parse diskStatsEveryKb {:?}", e)))?;
let scalar_type = pairs
.get("scalarType")
.ok_or_else(|| Error::with_msg("missing scalarType"))
.map(|x| ScalarType::from_url_str(&x))??;
let shape = pairs
.get("shape")
.ok_or_else(|| Error::with_msg("missing shape"))
.map(|x| Shape::from_url_str(&x))??;
let ret = Self {
patch: PreBinnedPatchCoord::new(bin_t_len, patch_t_len, patch_ix),
channel: channel_from_pairs(&pairs)?,
scalar_type,
shape,
agg_kind: agg_kind_from_binning_scheme(&pairs).unwrap_or(AggKind::DimXBins1),
cache_usage: CacheUsage::from_pairs(&pairs)?,
disk_io_buffer_size: pairs
@@ -99,6 +118,14 @@ impl PreBinnedQuery {
&self.channel
}
pub fn scalar_type(&self) -> &ScalarType {
&self.scalar_type
}
pub fn shape(&self) -> &Shape {
&self.shape
}
pub fn agg_kind(&self) -> &AggKind {
&self.agg_kind
}
@@ -120,9 +147,10 @@ impl AppendToUrl for PreBinnedQuery {
fn append_to_url(&self, url: &mut Url) {
self.patch.append_to_url(url);
binning_scheme_append_to_url(&self.agg_kind, url);
channel_append_to_url(url, &self.channel);
let mut g = url.query_pairs_mut();
g.append_pair("channelBackend", &self.channel.backend);
g.append_pair("channelName", &self.channel.name);
g.append_pair("scalarType", &format!("{:?}", self.scalar_type));
g.append_pair("shape", &format!("{:?}", self.shape));
g.append_pair("cacheUsage", &format!("{}", self.cache_usage.query_param_value()));
g.append_pair("diskIoBufferSize", &format!("{}", self.disk_io_buffer_size));
g.append_pair("diskStatsEveryKb", &format!("{}", self.disk_stats_every.bytes() / 1024));

View File

@@ -197,29 +197,20 @@ pub async fn channel_exec<F>(
f: F,
channel: &Channel,
range: &NanoRange,
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
node_config: &NodeConfigCached,
) -> Result<F::Output, Error>
where
F: ChannelExecFunction,
{
let q = ChannelConfigQuery {
channel: channel.clone(),
range: range.clone(),
expand: agg_kind.need_expand(),
};
let conf = httpclient::get_channel_config(&q, node_config).await.map_err(|e| {
e.add_public_msg(format!(
"Can not find channel config for channel: {:?}",
q.channel.name()
))
})?;
let ret = channel_exec_config(
f,
conf.scalar_type.clone(),
// TODO is the byte order ever important here?
conf.byte_order.unwrap_or(ByteOrder::LE).clone(),
conf.shape.clone(),
scalar_type,
// TODO TODO TODO is the byte order ever important here?
ByteOrder::LE,
shape,
agg_kind,
node_config,
)?;

View File

@@ -815,6 +815,7 @@ mod test {
let chn = netpod::Channel {
backend: "testbackend".into(),
name: "scalar-i32-be".into(),
series: None,
};
// TODO read config from disk? Or expose the config from data generator?
let channel_config = ChannelConfig {

View File

@@ -250,6 +250,7 @@ mod test {
let chn = netpod::Channel {
backend: "testbackend".into(),
name: "scalar-i32-be".into(),
series: None,
};
// TODO read config from disk.
let channel_config = ChannelConfig {

View File

@@ -1,8 +1,7 @@
use chrono::{DateTime, TimeZone, Utc};
use err::Error;
use netpod::{
channel_from_pairs, get_url_query_pairs, AppendToUrl, Channel, FromUrl, HasBackend, HasTimeout, NanoRange, ToNanos,
};
use netpod::{channel_append_to_url, channel_from_pairs, get_url_query_pairs};
use netpod::{AppendToUrl, Channel, FromUrl, HasBackend, HasTimeout, NanoRange, ToNanos};
use std::time::Duration;
use url::Url;
@@ -32,11 +31,11 @@ impl PlainEventsBinaryQuery {
let beg_date = pairs.get("begDate").ok_or(Error::with_msg("missing begDate"))?;
let end_date = pairs.get("endDate").ok_or(Error::with_msg("missing endDate"))?;
let ret = Self {
channel: channel_from_pairs(&pairs)?,
range: NanoRange {
beg: beg_date.parse::<DateTime<Utc>>()?.to_nanos(),
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
channel: channel_from_pairs(&pairs)?,
disk_io_buffer_size: pairs
.get("diskIoBufferSize")
.map_or("4096", |k| k)
@@ -85,9 +84,8 @@ impl PlainEventsBinaryQuery {
impl AppendToUrl for PlainEventsBinaryQuery {
fn append_to_url(&self, url: &mut Url) {
let date_fmt = "%Y-%m-%dT%H:%M:%S.%3fZ";
channel_append_to_url(url, &self.channel);
let mut g = url.query_pairs_mut();
g.append_pair("channelBackend", &self.channel.backend);
g.append_pair("channelName", &self.channel.name);
g.append_pair(
"begDate",
&Utc.timestamp_nanos(self.range.beg as i64).format(date_fmt).to_string(),
@@ -137,11 +135,11 @@ impl PlainEventsJsonQuery {
let beg_date = pairs.get("begDate").ok_or(Error::with_public_msg("missing begDate"))?;
let end_date = pairs.get("endDate").ok_or(Error::with_public_msg("missing endDate"))?;
let ret = Self {
channel: channel_from_pairs(&pairs)?,
range: NanoRange {
beg: beg_date.parse::<DateTime<Utc>>()?.to_nanos(),
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
channel: channel_from_pairs(&pairs)?,
disk_io_buffer_size: pairs
.get("diskIoBufferSize")
.map_or("4096", |k| k)
@@ -176,14 +174,14 @@ impl PlainEventsJsonQuery {
Self::from_url(&url)
}
pub fn range(&self) -> &NanoRange {
&self.range
}
pub fn channel(&self) -> &Channel {
&self.channel
}
pub fn range(&self) -> &NanoRange {
&self.range
}
pub fn report_error(&self) -> bool {
self.report_error
}
@@ -210,9 +208,8 @@ impl PlainEventsJsonQuery {
pub fn append_to_url(&self, url: &mut Url) {
let date_fmt = "%Y-%m-%dT%H:%M:%S.%3fZ";
channel_append_to_url(url, &self.channel);
let mut g = url.query_pairs_mut();
g.append_pair("channelBackend", &self.channel.backend);
g.append_pair("channelName", &self.channel.name);
g.append_pair(
"begDate",
&Utc.timestamp_nanos(self.range.beg as i64).format(date_fmt).to_string(),

View File

@@ -103,7 +103,6 @@ where
buf: BytesMut,
wp: usize,
) -> (Option<Option<Result<InMemoryFrame, Error>>>, BytesMut, usize) {
let mut buf = buf;
let nb = wp;
if nb >= INMEM_FRAME_HEAD {
let magic = u32::from_le_bytes(*arrayref::array_ref![buf, 0, 4]);
@@ -162,16 +161,20 @@ where
let payload_crc_match = payload_crc_ind == payload_crc;
let frame_crc_match = frame_crc_ind == frame_crc;
if !payload_crc_match || !frame_crc_match {
let ss = String::from_utf8_lossy(&buf[..buf.len().min(256)]);
warn!("CRC mismatch A\n{ss:?}");
return (
Some(Some(Err(Error::with_msg(format!(
"InMemoryFrameAsyncReadStream tryparse crc mismatch {} {}",
"InMemoryFrameAsyncReadStream tryparse crc mismatch A {} {}",
payload_crc_match, frame_crc_match,
))))),
buf,
wp,
);
}
let mut buf = buf;
let mut buf3 = buf.split_to(nl);
let buf = buf;
buf3.advance(INMEM_FRAME_HEAD);
buf3.truncate(len as usize);
let mut h = crc32fast::Hasher::new();
@@ -179,9 +182,12 @@ where
let payload_crc_2 = h.finalize();
let payload_crc_2_match = payload_crc_2 == payload_crc_ind;
if !payload_crc_2_match {
let sa = String::from_utf8_lossy(&buf[..buf.len().min(256)]);
let sb = String::from_utf8_lossy(&buf3[..buf3.len().min(256)]);
warn!("CRC mismatch B\n{sa:?}\n{sb:?}");
return (
Some(Some(Err(Error::with_msg(format!(
"InMemoryFrameAsyncReadStream tryparse crc mismatch {} {} {}",
"InMemoryFrameAsyncReadStream tryparse crc mismatch B {} {} {}",
payload_crc_match, frame_crc_match, payload_crc_2_match,
))))),
buf,

View File

@@ -31,6 +31,7 @@ pub async fn gen_test_data() -> Result<(), Error> {
channel: Channel {
backend: "testbackend".into(),
name: "scalar-i32-be".into(),
series: None,
},
keyspace: 2,
time_bin_size: Nanos { ns: DAY },
@@ -49,6 +50,7 @@ pub async fn gen_test_data() -> Result<(), Error> {
channel: Channel {
backend: "testbackend".into(),
name: "wave-f64-be-n21".into(),
series: None,
},
keyspace: 3,
time_bin_size: Nanos { ns: DAY },
@@ -67,6 +69,7 @@ pub async fn gen_test_data() -> Result<(), Error> {
channel: Channel {
backend: "testbackend".into(),
name: "wave-u16-le-n77".into(),
series: None,
},
keyspace: 3,
time_bin_size: Nanos { ns: DAY },
@@ -85,6 +88,7 @@ pub async fn gen_test_data() -> Result<(), Error> {
channel: Channel {
backend: "testbackend".into(),
name: "tw-scalar-i32-be".into(),
series: None,
},
keyspace: 2,
time_bin_size: Nanos { ns: DAY },
@@ -103,6 +107,7 @@ pub async fn gen_test_data() -> Result<(), Error> {
channel: Channel {
backend: "testbackend".into(),
name: "const-regular-scalar-i32-be".into(),
series: None,
},
keyspace: 2,
time_bin_size: Nanos { ns: DAY },
@@ -131,7 +136,6 @@ pub async fn gen_test_data() -> Result<(), Error> {
}),
archiver_appliance: None,
channel_archiver: None,
access_scylla: false,
};
ensemble.nodes.push(node);
}

View File

@@ -345,6 +345,7 @@ mod test {
channel: Channel {
backend: "testbackend".into(),
name: "scalar-i32-be".into(),
series: None,
},
keyspace: 2,
time_bin_size: Nanos { ns: DAY },