Proxy events and binned and get it serve sf-databuffer and hipa-archive

This commit is contained in:
Dominik Werder
2021-06-21 21:45:40 +02:00
parent e7891fee13
commit c8c81470fc
16 changed files with 354 additions and 172 deletions

View File

@@ -14,7 +14,7 @@ use crate::decode::{
};
use crate::frame::makeframe::{Framable, FrameType, SubFrId};
use crate::merge::mergedfromremotes::MergedFromRemotes;
use crate::raw::EventsQuery;
use crate::raw::RawEventsQuery;
use crate::Sitemty;
use bytes::Bytes;
use chrono::{TimeZone, Utc};
@@ -121,7 +121,7 @@ where
range
);
let bin_count = range.count as u32;
let evq = EventsQuery {
let evq = RawEventsQuery {
channel: query.channel().clone(),
range: query.range().clone(),
agg_kind: query.agg_kind().clone(),
@@ -786,7 +786,7 @@ impl ChannelExecFunction for BinnedJsonChannelExec {
"binned_bytes_for_http no covering range for prebinned, merge from remotes instead {:?}",
range
);
let evq = EventsQuery {
let evq = RawEventsQuery {
channel: self.query.channel().clone(),
range: self.query.range().clone(),
agg_kind: self.query.agg_kind().clone(),

View File

@@ -7,7 +7,7 @@ use crate::cache::{write_pb_cache_min_max_avg_scalar, CacheFileDesc, WrittenPbCa
use crate::decode::{Endianness, EventValueFromBytes, EventValueShape, NumFromBytes};
use crate::frame::makeframe::FrameType;
use crate::merge::mergedfromremotes::MergedFromRemotes;
use crate::raw::EventsQuery;
use crate::raw::RawEventsQuery;
use crate::streamlog::Streamlog;
use crate::Sitemty;
use err::Error;
@@ -110,7 +110,7 @@ where
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
Error,
> {
let evq = EventsQuery {
let evq = RawEventsQuery {
channel: self.query.channel().clone(),
range: self.query.patch().patch_range(),
agg_kind: self.query.agg_kind().clone(),

View File

@@ -1,9 +1,11 @@
use crate::query::channel_from_params;
use chrono::{DateTime, TimeZone, Utc};
use err::Error;
use http::request::Parts;
use netpod::log::*;
use netpod::{AggKind, ByteSize, Channel, HostPort, NanoRange, PreBinnedPatchCoord, ToNanos};
use netpod::{
channel_from_pairs, get_url_query_pairs, AggKind, AppendToUrl, ByteSize, Channel, FromUrl, HasBackend, HasTimeout,
HostPort, NanoRange, PreBinnedPatchCoord, ToNanos,
};
use std::collections::BTreeMap;
use std::time::Duration;
use url::Url;
@@ -63,9 +65,9 @@ impl PreBinnedQuery {
.map_err(|e| Error::with_msg(format!("can not parse diskStatsEveryKb {:?}", e)))?;
let ret = Self {
patch: PreBinnedPatchCoord::new(bin_t_len, patch_t_len, patch_ix),
channel: channel_from_params(&pairs)?,
channel: channel_from_pairs(&pairs)?,
agg_kind: agg_kind_from_binning_scheme(&pairs).unwrap_or(AggKind::DimXBins1),
cache_usage: CacheUsage::from_params(&pairs)?,
cache_usage: CacheUsage::from_pairs(&pairs)?,
disk_stats_every: ByteSize::kb(disk_stats_every),
report_error: pairs
.get("reportError")
@@ -137,7 +139,7 @@ impl CacheUsage {
.into()
}
pub fn from_params(params: &BTreeMap<String, String>) -> Result<Self, Error> {
pub fn from_pairs(params: &BTreeMap<String, String>) -> Result<Self, Error> {
let ret = params.get("cacheUsage").map_or(Ok::<_, Error>(CacheUsage::Use), |k| {
if k == "use" {
Ok(CacheUsage::Use)
@@ -200,49 +202,6 @@ impl BinnedQuery {
}
}
pub fn from_request(req: &http::request::Parts) -> Result<Self, Error> {
let params = netpod::query_params(req.uri.query());
let beg_date = params.get("begDate").ok_or(Error::with_msg("missing begDate"))?;
let end_date = params.get("endDate").ok_or(Error::with_msg("missing endDate"))?;
let disk_stats_every = params.get("diskStatsEveryKb").map_or("2000", |k| k);
let disk_stats_every = disk_stats_every
.parse()
.map_err(|e| Error::with_msg(format!("can not parse diskStatsEveryKb {:?}", e)))?;
let ret = Self {
channel: channel_from_params(&params)?,
range: NanoRange {
beg: beg_date.parse::<DateTime<Utc>>()?.to_nanos(),
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
bin_count: params
.get("binCount")
.ok_or(Error::with_msg("missing binCount"))?
.parse()
.map_err(|e| Error::with_msg(format!("can not parse binCount {:?}", e)))?,
agg_kind: agg_kind_from_binning_scheme(&params).unwrap_or(AggKind::DimXBins1),
cache_usage: CacheUsage::from_params(&params)?,
disk_stats_every: ByteSize::kb(disk_stats_every),
report_error: params
.get("reportError")
.map_or("false", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse reportError {:?}", e)))?,
timeout: params
.get("timeout")
.map_or("2000", |k| k)
.parse::<u64>()
.map(|k| Duration::from_millis(k))
.map_err(|e| Error::with_msg(format!("can not parse timeout {:?}", e)))?,
abort_after_bin_count: params
.get("abortAfterBinCount")
.map_or("0", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse abortAfterBinCount {:?}", e)))?,
};
info!("BinnedQuery::from_request {:?}", ret);
Ok(ret)
}
pub fn range(&self) -> &NanoRange {
&self.range
}
@@ -291,8 +250,7 @@ impl BinnedQuery {
self.timeout = k;
}
// TODO the BinnedQuery itself should maybe already carry the full HostPort?
// On the other hand, want to keep the flexibility for the fail over possibility..
// TODO remove in favor of AppendToUrl
pub fn url(&self, host: &HostPort) -> String {
let date_fmt = "%Y-%m-%dT%H:%M:%S.%3fZ";
format!(
@@ -313,6 +271,86 @@ impl BinnedQuery {
}
}
impl HasBackend for BinnedQuery {
fn backend(&self) -> &str {
&self.channel.backend
}
}
impl HasTimeout for BinnedQuery {
fn timeout(&self) -> Duration {
self.timeout.clone()
}
}
impl FromUrl for BinnedQuery {
fn from_url(url: &Url) -> Result<Self, Error> {
let pairs = get_url_query_pairs(url);
let beg_date = pairs.get("begDate").ok_or(Error::with_msg("missing begDate"))?;
let end_date = pairs.get("endDate").ok_or(Error::with_msg("missing endDate"))?;
let disk_stats_every = pairs.get("diskStatsEveryKb").map_or("2000", |k| k);
let disk_stats_every = disk_stats_every
.parse()
.map_err(|e| Error::with_msg(format!("can not parse diskStatsEveryKb {:?}", e)))?;
let ret = Self {
channel: channel_from_pairs(&pairs)?,
range: NanoRange {
beg: beg_date.parse::<DateTime<Utc>>()?.to_nanos(),
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
bin_count: pairs
.get("binCount")
.ok_or(Error::with_msg("missing binCount"))?
.parse()
.map_err(|e| Error::with_msg(format!("can not parse binCount {:?}", e)))?,
agg_kind: agg_kind_from_binning_scheme(&pairs).unwrap_or(AggKind::DimXBins1),
cache_usage: CacheUsage::from_pairs(&pairs)?,
disk_stats_every: ByteSize::kb(disk_stats_every),
report_error: pairs
.get("reportError")
.map_or("false", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse reportError {:?}", e)))?,
timeout: pairs
.get("timeout")
.map_or("2000", |k| k)
.parse::<u64>()
.map(|k| Duration::from_millis(k))
.map_err(|e| Error::with_msg(format!("can not parse timeout {:?}", e)))?,
abort_after_bin_count: pairs
.get("abortAfterBinCount")
.map_or("0", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse abortAfterBinCount {:?}", e)))?,
};
info!("BinnedQuery::from_url {:?}", ret);
Ok(ret)
}
}
impl AppendToUrl for BinnedQuery {
fn append_to_url(&self, url: &mut Url) {
let date_fmt = "%Y-%m-%dT%H:%M:%S.%3fZ";
let mut g = url.query_pairs_mut();
g.append_pair("cacheUsage", &self.cache_usage.to_string());
g.append_pair("channelBackend", &self.channel.backend);
g.append_pair("channelName", &self.channel.name);
g.append_pair("binCount", &format!("{}", self.bin_count));
g.append_pair(
"begDate",
&Utc.timestamp_nanos(self.range.beg as i64).format(date_fmt).to_string(),
);
g.append_pair(
"endDate",
&Utc.timestamp_nanos(self.range.end as i64).format(date_fmt).to_string(),
);
g.append_pair("binningScheme", &binning_scheme_query_string(&self.agg_kind));
g.append_pair("diskStatsEveryKb", &format!("{}", self.disk_stats_every.bytes() / 1024));
g.append_pair("timeout", &format!("{}", self.timeout.as_millis()));
g.append_pair("abortAfterBinCount", &format!("{}", self.abort_after_bin_count));
}
}
fn binning_scheme_query_string(agg_kind: &AggKind) -> String {
match agg_kind {
AggKind::Plain => "fullValue".into(),
@@ -321,9 +359,9 @@ fn binning_scheme_query_string(agg_kind: &AggKind) -> String {
}
}
fn agg_kind_from_binning_scheme(params: &BTreeMap<String, String>) -> Result<AggKind, Error> {
fn agg_kind_from_binning_scheme(pairs: &BTreeMap<String, String>) -> Result<AggKind, Error> {
let key = "binningScheme";
let s = params
let s = pairs
.get(key)
.map_or(Err(Error::with_msg(format!("can not find {}", key))), |k| Ok(k))?;
let ret = if s == "fullValue" {
@@ -331,7 +369,7 @@ fn agg_kind_from_binning_scheme(params: &BTreeMap<String, String>) -> Result<Agg
} else if s == "toScalarX" {
AggKind::DimXBins1
} else if s == "binnedX" {
let u = params.get("binnedXcount").map_or("1", |k| k).parse()?;
let u = pairs.get("binnedXcount").map_or("1", |k| k).parse()?;
AggKind::DimXBinsN(u)
} else {
return Err(Error::with_msg("can not extract binningScheme"));

View File

@@ -8,14 +8,13 @@ use crate::decode::{
};
use crate::frame::makeframe::{Framable, FrameType};
use crate::merge::mergedfromremotes::MergedFromRemotes;
use crate::raw::EventsQuery;
use crate::raw::RawEventsQuery;
use crate::Sitemty;
use bytes::Bytes;
use err::Error;
use futures_core::Stream;
use futures_util::future::FutureExt;
use futures_util::StreamExt;
use netpod::log::*;
use netpod::{AggKind, ByteOrder, Channel, NanoRange, NodeConfigCached, PerfOpts, ScalarType, Shape};
use parse::channelconfig::{extract_matching_config_entry, read_local_config, MatchingConfigEntry};
use serde::de::DeserializeOwned;
@@ -249,7 +248,7 @@ impl ChannelExecFunction for PlainEvents {
let _ = byte_order;
let _ = event_value_shape;
let perf_opts = PerfOpts { inmem_bufcap: 4096 };
let evq = EventsQuery {
let evq = RawEventsQuery {
channel: self.channel,
range: self.range,
agg_kind: self.agg_kind,
@@ -301,7 +300,6 @@ where
S: Stream<Item = Sitemty<T>> + Unpin,
T: Collectable + Debug,
{
info!("\n\nConstruct deadline with timeout {:?}\n\n", timeout);
let deadline = tokio::time::Instant::now() + timeout;
// TODO in general a Collector does not need to know about the expected number of bins.
// It would make more sense for some specific Collector kind to know.
@@ -382,7 +380,7 @@ impl ChannelExecFunction for PlainEventsJson {
let _ = byte_order;
let _ = event_value_shape;
let perf_opts = PerfOpts { inmem_bufcap: 4096 };
let evq = EventsQuery {
let evq = RawEventsQuery {
channel: self.channel,
range: self.range,
agg_kind: self.agg_kind,

View File

@@ -1,19 +1,23 @@
use crate::query::channel_from_params;
use chrono::{DateTime, TimeZone, Utc};
use err::Error;
use netpod::{Channel, HostPort, NanoRange, ToNanos};
use netpod::log::*;
use netpod::{
channel_from_pairs, get_url_query_pairs, AppendToUrl, Channel, FromUrl, HasBackend, HasTimeout, HostPort,
NanoRange, ToNanos,
};
use std::time::Duration;
use url::Url;
// TODO move this query type out of this `binned` mod
#[derive(Clone, Debug)]
pub struct PlainEventsQuery {
pub struct PlainEventsBinaryQuery {
channel: Channel,
range: NanoRange,
report_error: bool,
timeout: Duration,
}
impl PlainEventsQuery {
impl PlainEventsBinaryQuery {
pub fn new(channel: Channel, range: NanoRange) -> Self {
Self {
channel,
@@ -24,7 +28,10 @@ impl PlainEventsQuery {
}
pub fn from_request(req: &http::request::Parts) -> Result<Self, Error> {
// TODO use Url
let params = netpod::query_params(req.uri.query());
info!("PlainEventsBinaryQuery from_request uri: {:?}", req.uri);
info!("PlainEventsBinaryQuery from_request params: {:?}", params);
let beg_date = params.get("begDate").ok_or(Error::with_msg("missing begDate"))?;
let end_date = params.get("endDate").ok_or(Error::with_msg("missing endDate"))?;
let ret = Self {
@@ -32,7 +39,7 @@ impl PlainEventsQuery {
beg: beg_date.parse::<DateTime<Utc>>()?.to_nanos(),
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
channel: channel_from_params(&params)?,
channel: channel_from_pairs(&params)?,
report_error: params
.get("reportError")
.map_or("false", |k| k)
@@ -102,22 +109,22 @@ impl PlainEventsJsonQuery {
}
}
pub fn from_request(req: &http::request::Parts) -> Result<Self, Error> {
let params = netpod::query_params(req.uri.query());
let beg_date = params.get("begDate").ok_or(Error::with_msg("missing begDate"))?;
let end_date = params.get("endDate").ok_or(Error::with_msg("missing endDate"))?;
pub fn from_url(url: &Url) -> Result<Self, Error> {
let pairs = get_url_query_pairs(url);
let beg_date = pairs.get("begDate").ok_or(Error::with_msg("missing begDate"))?;
let end_date = pairs.get("endDate").ok_or(Error::with_msg("missing endDate"))?;
let ret = Self {
range: NanoRange {
beg: beg_date.parse::<DateTime<Utc>>()?.to_nanos(),
end: end_date.parse::<DateTime<Utc>>()?.to_nanos(),
},
channel: channel_from_params(&params)?,
report_error: params
channel: channel_from_pairs(&pairs)?,
report_error: pairs
.get("reportError")
.map_or("false", |k| k)
.parse()
.map_err(|e| Error::with_msg(format!("can not parse reportError {:?}", e)))?,
timeout: params
timeout: pairs
.get("timeout")
.map_or("10000", |k| k)
.parse::<u64>()
@@ -127,6 +134,12 @@ impl PlainEventsJsonQuery {
Ok(ret)
}
pub fn from_request_head(head: &http::request::Parts) -> Result<Self, Error> {
let s1 = format!("dummy:{}", head.uri);
let url = Url::parse(&s1)?;
Self::from_url(&url)
}
pub fn range(&self) -> &NanoRange {
&self.range
}
@@ -147,6 +160,7 @@ impl PlainEventsJsonQuery {
self.timeout = k;
}
// TODO remove in favor of Self::append_to_url
pub fn url(&self, host: &HostPort) -> String {
let date_fmt = "%Y-%m-%dT%H:%M:%S.%3fZ";
format!(
@@ -160,4 +174,44 @@ impl PlainEventsJsonQuery {
self.timeout.as_millis(),
)
}
pub fn append_to_url(&self, url: &mut Url) {
let date_fmt = "%Y-%m-%dT%H:%M:%S.%3fZ";
let mut g = url.query_pairs_mut();
g.append_pair("channelBackend", &self.channel.backend);
g.append_pair("channelName", &self.channel.name);
g.append_pair(
"begDate",
&Utc.timestamp_nanos(self.range.beg as i64).format(date_fmt).to_string(),
);
g.append_pair(
"endDate",
&Utc.timestamp_nanos(self.range.end as i64).format(date_fmt).to_string(),
);
g.append_pair("timeout", &format!("{}", self.timeout.as_millis()));
}
}
impl HasBackend for PlainEventsJsonQuery {
fn backend(&self) -> &str {
&self.channel.backend
}
}
impl HasTimeout for PlainEventsJsonQuery {
fn timeout(&self) -> Duration {
self.timeout.clone()
}
}
impl FromUrl for PlainEventsJsonQuery {
fn from_url(url: &Url) -> Result<Self, Error> {
Self::from_url(url)
}
}
impl AppendToUrl for PlainEventsJsonQuery {
fn append_to_url(&self, url: &mut Url) {
self.append_to_url(url)
}
}

View File

@@ -34,7 +34,6 @@ pub mod gen;
pub mod index;
pub mod merge;
pub mod paths;
pub mod query;
pub mod raw;
pub mod streamlog;

View File

@@ -2,7 +2,7 @@ use crate::agg::streams::Appendable;
use crate::binned::{EventsNodeProcessor, PushableIndex};
use crate::frame::makeframe::FrameType;
use crate::merge::MergedStream;
use crate::raw::{x_processed_stream_from_node, EventsQuery};
use crate::raw::{x_processed_stream_from_node, RawEventsQuery};
use crate::Sitemty;
use err::Error;
use futures_core::Stream;
@@ -34,7 +34,7 @@ where
<ENP as EventsNodeProcessor>::Output: Unpin,
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType,
{
pub fn new(evq: EventsQuery, perf_opts: PerfOpts, cluster: Cluster) -> Self {
pub fn new(evq: RawEventsQuery, perf_opts: PerfOpts, cluster: Cluster) -> Self {
info!("MergedFromRemotes evq {:?}", evq);
let mut tcp_establish_futs = vec![];
for node in &cluster.nodes {

View File

@@ -1,17 +0,0 @@
use err::Error;
use netpod::Channel;
use std::collections::BTreeMap;
pub fn channel_from_params(params: &BTreeMap<String, String>) -> Result<Channel, Error> {
let ret = Channel {
backend: params
.get("channelBackend")
.ok_or(Error::with_msg("missing channelBackend"))?
.into(),
name: params
.get("channelName")
.ok_or(Error::with_msg("missing channelName"))?
.into(),
};
Ok(ret)
}

View File

@@ -13,7 +13,6 @@ use crate::raw::eventsfromframes::EventsFromFrames;
use crate::Sitemty;
use err::Error;
use futures_core::Stream;
use netpod::log::*;
use netpod::{AggKind, Channel, NanoRange, Node, PerfOpts};
use serde::{Deserialize, Serialize};
use std::pin::Pin;
@@ -27,7 +26,7 @@ pub mod eventsfromframes;
Query parameters to request (optionally) X-processed, but not T-processed events.
*/
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct EventsQuery {
pub struct RawEventsQuery {
pub channel: Channel,
pub range: NanoRange,
pub agg_kind: AggKind,
@@ -37,7 +36,7 @@ pub struct EventsQuery {
pub struct EventQueryJsonStringFrame(String);
pub async fn x_processed_stream_from_node<ENP>(
query: EventsQuery,
query: RawEventsQuery,
perf_opts: PerfOpts,
node: Node,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<<ENP as EventsNodeProcessor>::Output>> + Send>>, Error>
@@ -48,7 +47,6 @@ where
{
let net = TcpStream::connect(format!("{}:{}", node.host, node.port_raw)).await?;
let qjs = serde_json::to_string(&query)?;
info!("x_processed_stream_from_node qjs {:?}", qjs);
let (netin, mut netout) = net.into_split();
let buf = make_frame(&EventQueryJsonStringFrame(qjs))?;
netout.write_all(&buf).await?;

View File

@@ -9,7 +9,7 @@ use crate::eventblobs::EventBlobsComplete;
use crate::eventchunker::EventChunkerConf;
use crate::frame::inmem::InMemoryFrameAsyncReadStream;
use crate::frame::makeframe::{decode_frame, make_frame, make_term_frame, Framable};
use crate::raw::{EventQueryJsonStringFrame, EventsQuery};
use crate::raw::{EventQueryJsonStringFrame, RawEventsQuery};
use crate::Sitemty;
use err::Error;
use futures_core::Stream;
@@ -238,7 +238,7 @@ async fn events_conn_handler_inner_try(
Ok(k) => k,
Err(e) => return Err((e, netout).into()),
};
let res: Result<EventsQuery, _> = serde_json::from_str(&qitem.0);
let res: Result<RawEventsQuery, _> = serde_json::from_str(&qitem.0);
let evq = match res {
Ok(k) => k,
Err(e) => {