Move workspace crates into subfolder

This commit is contained in:
Dominik Werder
2023-07-10 14:45:25 +02:00
parent 8938e55f86
commit 30c7fcb1e5
212 changed files with 246 additions and 41 deletions

View File

@@ -0,0 +1,230 @@
use futures_core::Stream;
use futures_util::StreamExt;
use items::{RangeCompletableItem, Sitemty, StreamItem, TimeBinnableType, TimeBinnableTypeAggregator};
use netpod::log::*;
use netpod::BinnedRange;
use netpod::NanoRange;
use std::collections::VecDeque;
use std::pin::Pin;
use std::task::{Context, Poll};
pub trait TimeBinningChoice {
type Output: TimeBinnableType;
type Aggregator: TimeBinnableTypeAggregator<Input = Self, Output = Self::Output> + Send + Unpin;
fn aggregator(range: NanoRange, bin_count: usize) -> Self::Aggregator;
}
pub struct TimeWeightedBinMethodMarker {}
pub struct TBinnerStreamPlay<S, TBT>
where
S: Stream<Item = Sitemty<TBT>>,
TBT: TimeBinnableType,
{
#[allow(unused)]
inp: Pin<Box<S>>,
#[allow(unused)]
left: Option<Poll<Option<Sitemty<TBT>>>>,
//aggtor: Option<<TBT as TimeBinnableType>::Aggregator>,
#[allow(unused)]
a: Option<TBT>,
}
pub struct TBinnerStream<S, TBT>
where
S: Stream<Item = Sitemty<TBT>>,
TBT: TimeBinnableType,
{
inp: Pin<Box<S>>,
spec: BinnedRange,
curbin: u32,
left: Option<Poll<Option<Sitemty<TBT>>>>,
aggtor: Option<<TBT as TimeBinnableType>::Aggregator>,
tmp_agg_results: VecDeque<<<TBT as TimeBinnableType>::Aggregator as TimeBinnableTypeAggregator>::Output>,
inp_completed: bool,
all_bins_emitted: bool,
range_complete_observed: bool,
range_complete_emitted: bool,
errored: bool,
completed: bool,
}
impl<S, TBT> TBinnerStream<S, TBT>
where
S: Stream<Item = Sitemty<TBT>> + Send + Unpin + 'static,
TBT: TimeBinnableType,
{
pub fn new(inp: S, spec: BinnedRange, x_bin_count: usize, do_time_weight: bool) -> Self {
let range = spec.get_range(0);
Self {
inp: Box::pin(inp),
spec,
curbin: 0,
left: None,
aggtor: Some(<TBT as TimeBinnableType>::aggregator(
range,
x_bin_count,
do_time_weight,
)),
tmp_agg_results: VecDeque::new(),
inp_completed: false,
all_bins_emitted: false,
range_complete_observed: false,
range_complete_emitted: false,
errored: false,
completed: false,
}
}
fn cur(&mut self, cx: &mut Context) -> Poll<Option<Sitemty<TBT>>> {
if let Some(cur) = self.left.take() {
cur
} else if self.inp_completed {
Poll::Ready(None)
} else {
let inp_poll_span = span!(Level::TRACE, "into_t_inp_poll");
let t = inp_poll_span.in_scope(|| self.inp.poll_next_unpin(cx));
if false {
// TODO collect as stats:
use Poll::*;
match &t {
Ready(item) => match item {
Some(item) => match item {
Ok(item) => match item {
StreamItem::DataItem(item) => match item {
RangeCompletableItem::Data(item) => {
info!("time binner got batch len {}", item.len());
}
_ => {}
},
_ => {}
},
_ => {}
},
_ => {}
},
_ => {}
}
}
t
}
}
// TODO handle unwrap error, or use a mem replace type instead of option:
fn cycle_current_bin(&mut self, expand: bool) {
self.curbin += 1;
let ret = self
.aggtor
.as_mut()
.unwrap()
.result_reset(self.spec.get_range(self.curbin), expand);
// TODO should we accumulate bins before emit? Maybe not, we want to stay responsive.
// Only if the frequency would be high, that would require cpu time checks. Worth it? Measure..
self.tmp_agg_results.push_back(ret);
if self.curbin >= self.spec.bin_count() as u32 {
self.all_bins_emitted = true;
}
}
fn handle(
&mut self,
cur: Poll<Option<Sitemty<TBT>>>,
) -> Option<Poll<Option<Sitemty<<<TBT as TimeBinnableType>::Aggregator as TimeBinnableTypeAggregator>::Output>>>>
{
use Poll::*;
match cur {
Ready(Some(Ok(item))) => match item {
StreamItem::Log(item) => Some(Ready(Some(Ok(StreamItem::Log(item))))),
StreamItem::Stats(item) => Some(Ready(Some(Ok(StreamItem::Stats(item))))),
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {
self.range_complete_observed = true;
None
}
RangeCompletableItem::Data(item) => {
if self.all_bins_emitted {
// Just drop the item because we will not emit anymore data.
// TODO gather stats.
None
} else {
let ag = self.aggtor.as_mut().unwrap();
if item.ends_before(ag.range().clone()) {
None
} else if item.starts_after(ag.range().clone()) {
self.left =
Some(Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))))));
self.cycle_current_bin(true);
// TODO cycle_current_bin enqueues the bin, can I return here instead?
None
} else {
ag.ingest(&item);
if item.ends_after(ag.range().clone()) {
self.left =
Some(Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))))));
self.cycle_current_bin(true);
}
// TODO cycle_current_bin enqueues the bin, can I return here instead?
None
}
}
}
},
},
Ready(Some(Err(e))) => {
self.errored = true;
Some(Ready(Some(Err(e))))
}
Ready(None) => {
self.inp_completed = true;
if self.all_bins_emitted {
None
} else {
self.cycle_current_bin(false);
// TODO cycle_current_bin enqueues the bin, can I return here instead?
None
}
}
Pending => Some(Pending),
}
}
}
impl<S, TBT> Stream for TBinnerStream<S, TBT>
where
S: Stream<Item = Sitemty<TBT>> + Send + Unpin + 'static,
TBT: TimeBinnableType + Send + Unpin + 'static,
<TBT as TimeBinnableType>::Aggregator: Unpin,
<<TBT as TimeBinnableType>::Aggregator as TimeBinnableTypeAggregator>::Output: Unpin,
{
type Item = Sitemty<<<TBT as TimeBinnableType>::Aggregator as TimeBinnableTypeAggregator>::Output>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
'outer: loop {
break if self.completed {
panic!("poll_next on completed");
} else if self.errored {
self.completed = true;
Ready(None)
} else if let Some(item) = self.tmp_agg_results.pop_front() {
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))))
} else if self.range_complete_emitted {
self.completed = true;
Ready(None)
} else if self.inp_completed && self.all_bins_emitted {
self.range_complete_emitted = true;
if self.range_complete_observed {
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else {
continue 'outer;
}
} else {
let cur = self.cur(cx);
match self.handle(cur) {
Some(item) => item,
None => continue 'outer,
}
};
}
}
}

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1 @@

158
crates/disk/src/aggtest.rs Normal file
View File

@@ -0,0 +1,158 @@
use crate::eventblobs::EventChunkerMultifile;
use crate::eventchunker::EventChunkerConf;
use crate::AggQuerySingleChannel;
use crate::SfDbChConf;
use err::Error;
use netpod::range::evrange::NanoRange;
use netpod::test_data_base_path_databuffer;
use netpod::timeunits::*;
use netpod::ByteOrder;
use netpod::ByteSize;
use netpod::DiskIoTune;
use netpod::DtNano;
use netpod::Node;
use netpod::ScalarType;
use netpod::SfChFetchInfo;
use netpod::SfDatabuffer;
use netpod::SfDbChannel;
use netpod::Shape;
pub fn make_test_node(id: u32) -> Node {
Node {
host: "localhost".into(),
listen: "0.0.0.0".into(),
port: 8800 + id as u16,
port_raw: 8800 + id as u16 + 100,
// TODO use a common function to supply the tmp path.
cache_base_path: test_data_base_path_databuffer().join(format!("node{:02}", id)),
sf_databuffer: Some(SfDatabuffer {
data_base_path: test_data_base_path_databuffer().join(format!("node{:02}", id)),
ksprefix: "ks".into(),
splits: None,
}),
archiver_appliance: None,
channel_archiver: None,
prometheus_api_bind: None,
}
}
#[test]
fn agg_x_dim_0() {
taskrun::run(async {
agg_x_dim_0_inner().await;
Ok::<_, Error>(())
})
.unwrap();
}
async fn agg_x_dim_0_inner() {
let node = make_test_node(0);
let query = AggQuerySingleChannel {
channel_config: SfDbChConf {
channel: SfDbChannel::from_name("sf-databuffer", "S10BC01-DBAM070:EOM1_T1"),
keyspace: 2,
time_bin_size: DtNano::from_ns(DAY),
array: false,
shape: Shape::Scalar,
scalar_type: ScalarType::F64,
byte_order: ByteOrder::Big,
compression: true,
},
timebin: 18723,
tb_file_count: 1,
buffer_size: 1024 * 4,
};
let fetch_info = SfChFetchInfo::new(
"sf-databuffer",
"S10BC01-DBAM070:EOM1_T1",
2,
DtNano::from_ns(DAY),
ByteOrder::Big,
ScalarType::F64,
Shape::Scalar,
);
let _bin_count = 20;
let ts1 = query.timebin as u64 * query.channel_config.time_bin_size.ns();
let ts2 = ts1 + HOUR * 24;
let range = NanoRange { beg: ts1, end: ts2 };
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
// TODO let upstream already provide DiskIoTune:
let mut disk_io_tune = DiskIoTune::default_for_testing();
disk_io_tune.read_buffer_len = query.buffer_size as usize;
let fut1 = EventChunkerMultifile::new(
range.clone(),
fetch_info,
node.clone(),
0,
disk_io_tune,
event_chunker_conf,
false,
true,
// TODO
32,
);
let _ = fut1;
// TODO add the binning and expectation and await the result.
}
#[test]
fn agg_x_dim_1() {
taskrun::run(async {
agg_x_dim_1_inner().await;
Ok::<_, Error>(())
})
.unwrap();
}
async fn agg_x_dim_1_inner() {
// sf-databuffer
// /data/sf-databuffer/daq_swissfel/daq_swissfel_3/byTime/S10BC01-DBAM070\:BAM_CH1_NORM/*
// S10BC01-DBAM070:BAM_CH1_NORM
let node = make_test_node(0);
let query = AggQuerySingleChannel {
channel_config: SfDbChConf {
channel: SfDbChannel::from_name("ks", "wave1"),
keyspace: 3,
time_bin_size: DtNano::from_ns(DAY),
array: true,
shape: Shape::Wave(1024),
scalar_type: ScalarType::F64,
byte_order: ByteOrder::Big,
compression: true,
},
timebin: 0,
tb_file_count: 1,
buffer_size: 17,
};
let fetch_info = SfChFetchInfo::new(
"ks",
"wave1",
2,
DtNano::from_ns(DAY),
ByteOrder::Big,
ScalarType::F64,
Shape::Scalar,
);
let _bin_count = 10;
let ts1 = query.timebin as u64 * query.channel_config.time_bin_size.ns();
let ts2 = ts1 + HOUR * 24;
let range = NanoRange { beg: ts1, end: ts2 };
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
// TODO let upstream already provide DiskIoTune:
let mut disk_io_tune = DiskIoTune::default_for_testing();
disk_io_tune.read_buffer_len = query.buffer_size as usize;
let fut1 = super::eventblobs::EventChunkerMultifile::new(
range.clone(),
fetch_info,
node.clone(),
0,
disk_io_tune,
event_chunker_conf,
false,
true,
// TODO
32,
);
let _ = fut1;
// TODO add the binning and expectation and await the result.
}

View File

@@ -0,0 +1,264 @@
use crate::agg::binnedt::TBinnerStream;
use crate::binned::query::PreBinnedQuery;
use crate::cache::node_ix_for_patch;
use err::Error;
use futures_core::Stream;
use futures_util::{FutureExt, StreamExt};
use http::{StatusCode, Uri};
use httpclient::HttpBodyAsAsyncRead;
use items::frame::decode_frame;
use items::{FrameDecodable, FrameType, FrameTypeInnerStatic, TimeBinnableType};
use items::{RangeCompletableItem, Sitemty, StreamItem};
use netpod::log::*;
use netpod::query::CacheUsage;
use netpod::x_bin_count;
use netpod::PreBinnedPatchIterator;
use netpod::{AggKind, AppendToUrl, BinnedRange, ByteSize, Channel, NodeConfigCached, PerfOpts, ScalarType, Shape};
use std::future::ready;
use std::marker::PhantomData;
use std::pin::Pin;
use std::str::FromStr;
use std::task::{Context, Poll};
use streams::frames::inmem::InMemoryFrameAsyncReadStream;
use url::Url;
pub struct FetchedPreBinned<TBT> {
uri: Uri,
resfut: Option<hyper::client::ResponseFuture>,
res: Option<InMemoryFrameAsyncReadStream<HttpBodyAsAsyncRead>>,
errored: bool,
completed: bool,
_m1: PhantomData<TBT>,
}
impl<TBT> FetchedPreBinned<TBT> {
pub fn new(query: &PreBinnedQuery, host: String, port: u16) -> Result<Self, Error>
where
TBT: FrameTypeInnerStatic + TimeBinnableType,
Sitemty<TBT>: FrameDecodable,
{
// TODO should not assume http:
let mut url = Url::parse(&format!("http://{host}:{port}/api/4/prebinned"))?;
query.append_to_url(&mut url);
let ret = Self {
uri: Uri::from_str(&url.to_string()).map_err(Error::from_string)?,
resfut: None,
res: None,
errored: false,
completed: false,
_m1: PhantomData,
};
Ok(ret)
}
}
impl<TBT> Stream for FetchedPreBinned<TBT>
where
TBT: FrameTypeInnerStatic + TimeBinnableType,
Sitemty<TBT>: FrameDecodable,
{
type Item = Sitemty<TBT>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
'outer: loop {
break if self.completed {
panic!("poll_next on completed");
} else if self.errored {
self.completed = true;
return Ready(None);
} else if let Some(res) = self.res.as_mut() {
match res.poll_next_unpin(cx) {
Ready(Some(Ok(item))) => match item {
StreamItem::Log(item) => Ready(Some(Ok(StreamItem::Log(item)))),
StreamItem::Stats(item) => Ready(Some(Ok(StreamItem::Stats(item)))),
StreamItem::DataItem(item) => match decode_frame::<Sitemty<TBT>>(&item) {
Ok(Ok(item)) => Ready(Some(Ok(item))),
Ok(Err(e)) => {
self.errored = true;
Ready(Some(Err(e)))
}
Err(e) => {
self.errored = true;
Ready(Some(Err(e)))
}
},
},
Ready(Some(Err(e))) => {
self.errored = true;
Ready(Some(Err(e)))
}
Ready(None) => {
self.completed = true;
Ready(None)
}
Pending => Pending,
}
} else if let Some(resfut) = self.resfut.as_mut() {
match resfut.poll_unpin(cx) {
Ready(res) => match res {
Ok(res) => {
if res.status() == StatusCode::OK {
let perf_opts = PerfOpts { inmem_bufcap: 512 };
let s1 = HttpBodyAsAsyncRead::new(res);
let s2 = InMemoryFrameAsyncReadStream::new(s1, perf_opts.inmem_bufcap);
self.res = Some(s2);
continue 'outer;
} else {
let msg =
format!("PreBinnedValueFetchedStream non-OK result from sub request: {res:?}");
error!("{msg}");
let e = Error::with_msg_no_trace(msg);
self.errored = true;
Ready(Some(Err(e)))
}
}
Err(e) => {
error!("PreBinnedValueStream error in stream {e:?}");
self.errored = true;
Ready(Some(Err(Error::from_string(e))))
}
},
Pending => Pending,
}
} else {
match hyper::Request::builder()
.method(http::Method::GET)
.uri(&self.uri)
.body(hyper::Body::empty())
{
Ok(req) => {
let client = hyper::Client::new();
self.resfut = Some(client.request(req));
continue 'outer;
}
Err(e) => {
self.errored = true;
Ready(Some(Err(Error::from_string(e))))
}
}
};
}
}
}
/// Generate bins from a range of pre-binned patches.
///
/// Takes an iterator over the necessary patches.
pub struct BinnedFromPreBinned<TBT>
where
TBT: TimeBinnableType,
{
// TODO get rid of box:
inp: Pin<Box<dyn Stream<Item = Sitemty<TBT>> + Send>>,
_m1: PhantomData<TBT>,
}
impl<TBT> BinnedFromPreBinned<TBT>
where
TBT: TimeBinnableType<Output = TBT> + Unpin + 'static,
Sitemty<TBT>: FrameType + FrameDecodable,
{
pub fn new(
patch_it: PreBinnedPatchIterator,
channel: Channel,
range: BinnedRange,
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
cache_usage: CacheUsage,
disk_io_buffer_size: usize,
node_config: &NodeConfigCached,
disk_stats_every: ByteSize,
report_error: bool,
) -> Result<Self, Error> {
let patches: Vec<_> = patch_it.collect();
let mut sp = String::new();
if false {
// Convert this to a StreamLog message:
for (i, p) in patches.iter().enumerate() {
use std::fmt::Write;
write!(sp, " • patch {i:2} {p:?}\n")?;
}
info!("Using these pre-binned patches:\n{sp}");
}
let pmax = patches.len();
let inp = futures_util::stream::iter(patches.into_iter().enumerate())
.map({
let shape = shape.clone();
let agg_kind = agg_kind.clone();
let node_config = node_config.clone();
move |(pix, patch)| {
let query = PreBinnedQuery::new(
patch,
channel.clone(),
scalar_type.clone(),
shape.clone(),
agg_kind.clone(),
cache_usage.clone(),
disk_io_buffer_size,
disk_stats_every.clone(),
report_error,
);
let nodeix = node_ix_for_patch(&query.patch(), &query.channel(), &node_config.node_config.cluster);
let node = &node_config.node_config.cluster.nodes[nodeix as usize];
let ret: Pin<Box<dyn Stream<Item = _> + Send>> =
match FetchedPreBinned::<TBT>::new(&query, node.host.clone(), node.port.clone()) {
Ok(stream) => Box::pin(stream.map(move |q| (pix, q))),
Err(e) => {
error!("error from PreBinnedValueFetchedStream::new {e:?}");
Box::pin(futures_util::stream::iter(vec![(pix, Err(e))]))
}
};
ret
}
})
.flatten()
.filter_map({
let range = range.clone();
move |(pix, k)| {
let fit_range = range.full_range();
let g = match k {
Ok(item) => match item {
StreamItem::Log(item) => Some(Ok(StreamItem::Log(item))),
StreamItem::Stats(item) => Some(Ok(StreamItem::Stats(item))),
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {
if pix + 1 == pmax {
Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))
} else {
None
}
}
RangeCompletableItem::Data(item) => {
match crate::binned::FilterFittingInside::filter_fitting_inside(item, fit_range) {
Some(item) => Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))),
None => None,
}
}
},
},
Err(e) => Some(Err(e)),
};
ready(g)
}
});
let inp = TBinnerStream::<_, TBT>::new(inp, range, x_bin_count(&shape, &agg_kind), agg_kind.do_time_weighted());
Ok(Self {
inp: Box::pin(inp),
_m1: PhantomData,
})
}
}
impl<TBT> Stream for BinnedFromPreBinned<TBT>
where
TBT: TimeBinnableType<Output = TBT> + Unpin + 'static,
Sitemty<TBT>: FrameType + FrameDecodable,
{
type Item = Sitemty<TBT>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
self.inp.poll_next_unpin(cx)
}
}

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,537 @@
use crate::agg::binnedt::TBinnerStream;
use crate::binned::binnedfrompbv::FetchedPreBinned;
use crate::binned::query::PreBinnedQuery;
use crate::binned::WithLen;
use crate::cache::{write_pb_cache_min_max_avg_scalar, CacheFileDesc, WrittenPbCache};
use crate::decode::{Endianness, EventValueFromBytes, EventValueShape, NumFromBytes};
use crate::merge::mergedfromremotes::MergedFromRemotes;
use crate::streamlog::Streamlog;
use err::Error;
use futures_core::Stream;
use futures_util::{FutureExt, StreamExt};
use items::numops::NumOps;
use items::{
Appendable, Clearable, EventsNodeProcessor, EventsTypeAliases, FrameDecodable, FrameType, PushableIndex,
RangeCompletableItem, ReadableFromFile, Sitemty, StreamItem, TimeBinnableType,
};
use netpod::log::*;
use netpod::query::{CacheUsage, RawEventsQuery};
use netpod::x_bin_count;
use netpod::{AggKind, BinnedRange, PreBinnedPatchIterator, PreBinnedPatchRange};
use netpod::{NodeConfigCached, PerfOpts};
use serde::Serialize;
use std::future::Future;
use std::io;
use std::marker::PhantomData;
use std::path::PathBuf;
use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::fs::{File, OpenOptions};
pub struct PreBinnedValueStream<NTY, END, EVS, ENP>
where
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + 'static,
END: Endianness + 'static,
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END> + 'static,
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch>,
{
query: PreBinnedQuery,
agg_kind: AggKind,
node_config: NodeConfigCached,
open_check_local_file: Option<Pin<Box<dyn Future<Output = Result<File, io::Error>> + Send>>>,
stream_from_other_inputs:
Option<Pin<Box<dyn Stream<Item = Sitemty<<ENP as EventsTypeAliases>::TimeBinOutput>> + Send>>>,
read_from_cache: bool,
cache_written: bool,
data_complete: bool,
range_complete_observed: bool,
range_complete_emitted: bool,
errored: bool,
all_done: bool,
completed: bool,
streamlog: Streamlog,
values: Option<<ENP as EventsTypeAliases>::TimeBinOutput>,
write_fut: Option<Pin<Box<dyn Future<Output = Result<WrittenPbCache, Error>> + Send>>>,
read_cache_fut: Option<Pin<Box<dyn Future<Output = Sitemty<<ENP as EventsTypeAliases>::TimeBinOutput>> + Send>>>,
_m1: PhantomData<NTY>,
_m2: PhantomData<END>,
_m3: PhantomData<EVS>,
_m4: PhantomData<ENP>,
}
impl<NTY, END, EVS, ENP> PreBinnedValueStream<NTY, END, EVS, ENP>
where
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + 'static,
END: Endianness + 'static,
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END> + 'static,
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch> + 'static,
<ENP as EventsNodeProcessor>::Output: PushableIndex + Appendable + Clearable,
// TODO is this needed:
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType,
// TODO who exactly needs this DeserializeOwned?
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>: FrameType + FrameDecodable,
{
pub fn new(query: PreBinnedQuery, agg_kind: AggKind, node_config: &NodeConfigCached) -> Self {
Self {
query,
agg_kind,
node_config: node_config.clone(),
open_check_local_file: None,
stream_from_other_inputs: None,
read_from_cache: false,
cache_written: false,
data_complete: false,
range_complete_observed: false,
range_complete_emitted: false,
errored: false,
all_done: false,
completed: false,
streamlog: Streamlog::new(node_config.ix as u32),
// TODO use alias via some trait associated type:
//values: <<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output as Appendable>::empty(),
values: None,
write_fut: None,
read_cache_fut: None,
_m1: PhantomData,
_m2: PhantomData,
_m3: PhantomData,
_m4: PhantomData,
}
}
fn setup_merged_from_remotes(
&mut self,
) -> Result<
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
Error,
> {
// TODO let PreBinnedQuery provide the tune and pass to RawEventsQuery:
let evq = RawEventsQuery::new(
self.query.channel().clone(),
self.query.patch().patch_range(),
self.query.agg_kind().clone(),
);
if self.query.patch().patch_t_len() % self.query.patch().bin_t_len() != 0 {
let msg = format!(
"Patch length inconsistency {} {}",
self.query.patch().patch_t_len(),
self.query.patch().bin_t_len()
);
error!("{}", msg);
return Err(Error::with_msg(msg));
}
// TODO do I need to set up more transformations or binning to deliver the requested data?
let count = self.query.patch().patch_t_len() / self.query.patch().bin_t_len();
let range = BinnedRange::covering_range(evq.range.clone(), count as u32)?;
let perf_opts = PerfOpts { inmem_bufcap: 512 };
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster.clone());
let ret = TBinnerStream::<_, <ENP as EventsNodeProcessor>::Output>::new(
s,
range,
x_bin_count(&self.query.shape().clone(), &self.agg_kind),
self.agg_kind.do_time_weighted(),
);
Ok(Box::pin(ret))
}
fn setup_from_higher_res_prebinned(
&mut self,
range: PreBinnedPatchRange,
) -> Result<
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
Error,
> {
let g = self.query.patch().bin_t_len();
let h = range.grid_spec.bin_t_len();
trace!(
"try_setup_fetch_prebinned_higher_res found g {} h {} ratio {} mod {} {:?}",
g,
h,
g / h,
g % h,
range,
);
if g / h <= 1 {
let msg = format!("try_setup_fetch_prebinned_higher_res g {} h {}", g, h);
return Err(Error::with_msg(msg));
}
if g / h > 1024 * 10 {
let msg = format!("try_setup_fetch_prebinned_higher_res g {} h {}", g, h);
return Err(Error::with_msg(msg));
}
if g % h != 0 {
let msg = format!("try_setup_fetch_prebinned_higher_res g {} h {}", g, h);
return Err(Error::with_msg(msg));
}
let node_config = self.node_config.clone();
let patch_it = PreBinnedPatchIterator::from_range(range);
let s = futures_util::stream::iter(patch_it)
.map({
let q2 = self.query.clone();
let disk_io_buffer_size = self.query.disk_io_buffer_size();
let disk_stats_every = self.query.disk_stats_every().clone();
let report_error = self.query.report_error();
move |patch| {
let query = PreBinnedQuery::new(
patch,
q2.channel().clone(),
q2.scalar_type().clone(),
q2.shape().clone(),
q2.agg_kind().clone(),
q2.cache_usage().clone(),
disk_io_buffer_size,
disk_stats_every.clone(),
report_error,
);
let nodeix = crate::cache::node_ix_for_patch(
&query.patch(),
&query.channel(),
&node_config.node_config.cluster,
);
let node = &node_config.node_config.cluster.nodes[nodeix as usize];
let ret =
FetchedPreBinned::<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>::new(
&query,
node.host.clone(),
node.port.clone(),
)?;
Ok(ret)
}
})
.map(|k| {
let s: Pin<Box<dyn Stream<Item = _> + Send>> = match k {
Ok(k) => Box::pin(k),
Err(e) => Box::pin(futures_util::stream::iter(vec![Err(e)])),
};
s
})
.flatten();
Ok(Box::pin(s))
}
fn try_setup_fetch_prebinned_higher_res(&mut self) -> Result<(), Error> {
info!("try_setup_fetch_prebinned_higher_res");
let range = self.query.patch().patch_range();
match PreBinnedPatchRange::covering_range(range, self.query.patch().bin_count() + 1) {
Ok(Some(range)) => {
self.stream_from_other_inputs = Some(self.setup_from_higher_res_prebinned(range)?);
}
Ok(None) => {
self.stream_from_other_inputs = Some(self.setup_merged_from_remotes()?);
}
Err(e) => return Err(e),
}
Ok(())
}
fn poll_write_fut(
self: &mut Self,
mut fut: Pin<Box<dyn Future<Output = Result<WrittenPbCache, Error>> + Send>>,
cx: &mut Context,
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
trace!("poll_write_fut");
use Poll::*;
match fut.poll_unpin(cx) {
Ready(item) => {
self.cache_written = true;
self.write_fut = None;
match item {
Ok(res) => {
self.streamlog.append(
Level::INFO,
format!(
"cache file written bytes: {} duration {} ms",
res.bytes,
res.duration.as_millis()
),
);
self.all_done = true;
Ready(None)
}
Err(e) => {
self.errored = true;
Ready(Some(Err(e)))
}
}
}
Pending => {
self.write_fut = Some(fut);
Pending
}
}
}
fn poll_read_cache_fut(
self: &mut Self,
mut fut: Pin<
Box<
dyn Future<
Output = Result<
StreamItem<RangeCompletableItem<<ENP as EventsTypeAliases>::TimeBinOutput>>,
Error,
>,
> + Send,
>,
>,
cx: &mut Context,
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
trace!("poll_read_cache_fut");
use Poll::*;
match fut.poll_unpin(cx) {
Ready(item) => {
self.read_cache_fut = None;
match item {
Ok(item) => {
self.data_complete = true;
self.range_complete_observed = true;
Ready(Some(Ok(item)))
}
Err(e) => {
self.errored = true;
Ready(Some(Err(e)))
}
}
}
Pending => {
self.read_cache_fut = Some(fut);
Pending
}
}
}
fn handle_data_complete(
self: &mut Self,
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
trace!("handle_data_complete");
use Poll::*;
if self.cache_written {
// TODO can we ever get here?
if self.range_complete_observed {
self.range_complete_emitted = true;
let item = RangeCompletableItem::RangeComplete;
Ready(Some(Ok(StreamItem::DataItem(item))))
} else {
self.all_done = true;
Ready(None)
}
} else if self.read_from_cache {
// TODO refactor: raising cache_written even though we did not actually write is misleading.
self.cache_written = true;
self.all_done = true;
Ready(None)
} else {
match self.query.cache_usage() {
CacheUsage::Use | CacheUsage::Recreate => {
if let Some(values) = self.values.take() {
let msg = format!(
"write cache file query: {:?} bin count: {}",
self.query.patch(),
values.len(),
);
self.streamlog.append(Level::INFO, msg);
let fut = write_pb_cache_min_max_avg_scalar(
values,
self.query.patch().clone(),
self.query.agg_kind().clone(),
self.query.channel().clone(),
self.node_config.clone(),
);
self.write_fut = Some(Box::pin(fut));
Ready(None)
} else {
warn!("no values to write to cache");
Ready(None)
}
}
_ => {
// TODO refactor: raising cache_written even though we did not actually write is misleading.
self.cache_written = true;
self.all_done = true;
Ready(None)
}
}
}
}
fn poll_stream_from_other_inputs(
self: &mut Self,
mut fut: Pin<
Box<
dyn Stream<
Item = Result<
StreamItem<RangeCompletableItem<<ENP as EventsTypeAliases>::TimeBinOutput>>,
Error,
>,
> + Send,
>,
>,
cx: &mut Context,
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
use Poll::*;
match fut.poll_next_unpin(cx) {
Ready(Some(k)) => match k {
Ok(item) => {
self.stream_from_other_inputs = Some(fut);
match item {
StreamItem::Log(item) => Ready(Some(Ok(StreamItem::Log(item)))),
StreamItem::Stats(item) => Ready(Some(Ok(StreamItem::Stats(item)))),
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {
self.range_complete_observed = true;
Ready(None)
}
RangeCompletableItem::Data(item) => {
if let Some(values) = &mut self.values {
values.append(&item);
} else {
let mut values = item.empty_like_self();
values.append(&item);
self.values = Some(values);
}
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))))
}
},
}
}
Err(e) => {
self.errored = true;
Ready(Some(Err(e)))
}
},
Ready(None) => {
self.data_complete = true;
Ready(None)
}
Pending => {
self.stream_from_other_inputs = Some(fut);
Pending
}
}
}
fn poll_open_check_local_file(
self: &mut Self,
mut fut: Pin<Box<dyn Future<Output = Result<File, io::Error>> + Send>>,
cx: &mut Context,
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
use Poll::*;
match fut.poll_unpin(cx) {
Ready(item) => {
match item {
Ok(file) => {
self.read_from_cache = true;
let fut =
<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output as ReadableFromFile>::read_from_file(file)?;
self.read_cache_fut = Some(Box::pin(fut));
// Return Ready(None) to signal that nothing is Pending but we need to get polled again.
//continue 'outer;
Ready(None)
}
Err(e) => match e.kind() {
// TODO other error kinds
io::ErrorKind::NotFound => match self.try_setup_fetch_prebinned_higher_res() {
Ok(_) => {
if self.stream_from_other_inputs.is_none() {
let e =
Err(Error::with_msg(format!("try_setup_fetch_prebinned_higher_res failed")));
self.errored = true;
Ready(Some(e))
} else {
//continue 'outer;
Ready(None)
}
}
Err(e) => {
let e =
Error::with_msg(format!("try_setup_fetch_prebinned_higher_res error: {:?}", e));
self.errored = true;
Ready(Some(Err(e)))
}
},
_ => {
error!("File I/O error: kind {:?} {:?}\n\n..............", e.kind(), e);
self.errored = true;
Ready(Some(Err(e.into())))
}
},
}
}
Pending => {
self.open_check_local_file = Some(fut);
Pending
}
}
}
}
macro_rules! some_or_continue {
($x:expr) => {
if let Ready(None) = $x {
continue;
} else {
$x
}
};
}
impl<NTY, END, EVS, ENP> Stream for PreBinnedValueStream<NTY, END, EVS, ENP>
where
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + Unpin + 'static,
END: Endianness + Unpin + 'static,
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END> + Unpin + 'static,
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch> + Unpin + 'static,
<ENP as EventsNodeProcessor>::Output: PushableIndex + Appendable + Clearable,
// TODO needed?
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType,
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>: FrameType + FrameDecodable,
{
type Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if self.completed {
panic!("PreBinnedValueStream poll_next on completed");
} else if self.errored {
self.completed = true;
Ready(None)
} else if self.all_done {
self.completed = true;
Ready(None)
} else if let Some(item) = self.streamlog.pop() {
Ready(Some(Ok(StreamItem::Log(item))))
} else if let Some(fut) = self.write_fut.take() {
let x = Self::poll_write_fut(&mut self, fut, cx);
some_or_continue!(x)
} else if let Some(fut) = self.read_cache_fut.take() {
let x = Self::poll_read_cache_fut(&mut self, fut, cx);
some_or_continue!(x)
} else if self.range_complete_emitted {
self.completed = true;
Ready(None)
} else if self.data_complete {
let x = Self::handle_data_complete(&mut self);
some_or_continue!(x)
} else if let Some(fut) = self.stream_from_other_inputs.take() {
let x = Self::poll_stream_from_other_inputs(&mut self, fut, cx);
some_or_continue!(x)
} else if let Some(fut) = self.open_check_local_file.take() {
let x = Self::poll_open_check_local_file(&mut self, fut, cx);
some_or_continue!(x)
} else {
let cfd = CacheFileDesc::new(
self.query.channel().clone(),
self.query.patch().clone(),
self.query.agg_kind().clone(),
);
let path = match self.query.cache_usage() {
CacheUsage::Use => cfd.path(&self.node_config),
_ => PathBuf::from("DOESNOTEXIST"),
};
let fut = async { OpenOptions::new().read(true).open(path).await };
self.open_check_local_file = Some(Box::pin(fut));
continue;
};
}
}
}

View File

@@ -0,0 +1,260 @@
use crate::binned::pbv::PreBinnedValueStream;
use crate::binned::query::PreBinnedQuery;
use crate::cache::node_ix_for_patch;
use crate::decode::{
BigEndian, Endianness, EventValueFromBytes, EventValueShape, EventValuesDim0Case, EventValuesDim1Case,
LittleEndian, NumFromBytes,
};
use bytes::Bytes;
use dbconn::bincache::pre_binned_value_stream;
use err::Error;
use futures_core::Stream;
use futures_util::StreamExt;
use items::numops::{BoolNum, NumOps, StringNum};
use items::{
Appendable, Clearable, EventsNodeProcessor, Framable, FrameDecodable, FrameType, FrameTypeInnerDyn, PushableIndex,
RangeCompletableItem, Sitemty, StreamItem, TimeBinnableType, TimeBinned,
};
use netpod::log::*;
use netpod::{AggKind, ByteOrder, ChannelTyped, NodeConfigCached, ScalarType, Shape};
use serde::Serialize;
use std::pin::Pin;
async fn make_num_pipeline_nty_end_evs_enp<NTY, END, EVS, ENP>(
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
_event_value_shape: EVS,
_events_node_proc: ENP,
query: PreBinnedQuery,
node_config: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>, Error>
where
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + 'static,
END: Endianness + 'static,
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END> + 'static,
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch> + 'static,
<ENP as EventsNodeProcessor>::Output: PushableIndex + Appendable + Clearable + 'static,
<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output: FrameTypeInnerDyn + TimeBinned,
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType + Framable + 'static,
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>: Framable + FrameType + FrameDecodable,
{
if let Some(scyconf) = &node_config.node_config.cluster.cache_scylla {
trace!("~~~~~~~~~~~~~~~ make_num_pipeline_nty_end_evs_enp using scylla as cache");
let chn = ChannelTyped {
channel: query.channel().clone(),
scalar_type,
shape,
};
let stream = pre_binned_value_stream(
chn.channel().series().unwrap(),
&chn,
query.patch(),
agg_kind,
query.cache_usage(),
scyconf,
)
.await?;
let stream = stream.map(|x| {
let ret = match x {
Ok(k) => Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))),
Err(e) => Err(e),
};
ret
});
let stream = Box::pin(stream) as Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>;
Ok(stream)
} else {
let ret = PreBinnedValueStream::<NTY, END, EVS, ENP>::new(query, agg_kind, node_config);
let ret = StreamExt::map(ret, |item| {
//
match item {
Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))) => {
let g = Box::new(k) as Box<dyn TimeBinned>;
Ok(StreamItem::DataItem(RangeCompletableItem::Data(g)))
}
Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)) => {
Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))
}
Ok(StreamItem::Log(k)) => Ok(StreamItem::Log(k)),
Ok(StreamItem::Stats(k)) => Ok(StreamItem::Stats(k)),
Err(e) => Err(e),
}
});
Ok(Box::pin(ret))
}
}
async fn make_num_pipeline_nty_end<NTY, END>(
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
query: PreBinnedQuery,
node_config: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>, Error>
where
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + 'static,
END: Endianness + 'static,
{
match shape {
Shape::Scalar => {
let evs = EventValuesDim0Case::new();
match agg_kind {
AggKind::EventBlobs => panic!(),
AggKind::TimeWeightedScalar | AggKind::DimXBins1 => {
let events_node_proc = <<EventValuesDim0Case<NTY> as EventValueShape<NTY, END>>::NumXAggToSingleBin as EventsNodeProcessor>::create(shape.clone(), agg_kind.clone());
make_num_pipeline_nty_end_evs_enp::<NTY, END, _, _>(
scalar_type,
shape,
agg_kind,
evs,
events_node_proc,
query,
node_config,
)
.await
}
AggKind::DimXBinsN(_) => {
let events_node_proc = <<EventValuesDim0Case<NTY> as EventValueShape<NTY, END>>::NumXAggToNBins as EventsNodeProcessor>::create(shape.clone(), agg_kind.clone());
make_num_pipeline_nty_end_evs_enp::<NTY, END, _, _>(
scalar_type,
shape,
agg_kind,
evs,
events_node_proc,
query,
node_config,
)
.await
}
AggKind::Plain => {
panic!();
}
AggKind::Stats1 => {
// Currently not meant to be binned.
panic!();
}
}
}
Shape::Wave(n) => {
let evs = EventValuesDim1Case::new(n);
match agg_kind {
AggKind::EventBlobs => panic!(),
AggKind::TimeWeightedScalar | AggKind::DimXBins1 => {
let events_node_proc = <<EventValuesDim1Case<NTY> as EventValueShape<NTY, END>>::NumXAggToSingleBin as EventsNodeProcessor>::create(shape.clone(), agg_kind.clone());
make_num_pipeline_nty_end_evs_enp::<NTY, END, _, _>(
scalar_type,
shape,
agg_kind,
evs,
events_node_proc,
query,
node_config,
)
.await
}
AggKind::DimXBinsN(_) => {
let events_node_proc = <<EventValuesDim1Case<NTY> as EventValueShape<NTY, END>>::NumXAggToNBins as EventsNodeProcessor>::create(shape.clone(), agg_kind.clone());
make_num_pipeline_nty_end_evs_enp::<NTY, END, _, _>(
scalar_type,
shape,
agg_kind,
evs,
events_node_proc,
query,
node_config,
)
.await
}
AggKind::Plain => {
panic!();
}
AggKind::Stats1 => {
// Currently not meant to be binned.
panic!();
}
}
}
Shape::Image(..) => {
// TODO image binning/aggregation
err::todoval()
}
}
}
macro_rules! match_end {
($nty:ident, $end:expr, $scalar_type:expr, $shape:expr, $agg_kind:expr, $query:expr, $node_config:expr) => {
match $end {
ByteOrder::Little => {
make_num_pipeline_nty_end::<$nty, LittleEndian>($scalar_type, $shape, $agg_kind, $query, $node_config)
.await
}
ByteOrder::Big => {
make_num_pipeline_nty_end::<$nty, BigEndian>($scalar_type, $shape, $agg_kind, $query, $node_config)
.await
}
}
};
}
async fn make_num_pipeline(
scalar_type: ScalarType,
byte_order: ByteOrder,
shape: Shape,
agg_kind: AggKind,
query: PreBinnedQuery,
node_config: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>, Error> {
match scalar_type {
ScalarType::U8 => match_end!(u8, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::U16 => match_end!(u16, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::U32 => match_end!(u32, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::U64 => match_end!(u64, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::I8 => match_end!(i8, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::I16 => match_end!(i16, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::I32 => match_end!(i32, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::I64 => match_end!(i64, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::F32 => match_end!(f32, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::F64 => match_end!(f64, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::BOOL => match_end!(BoolNum, byte_order, scalar_type, shape, agg_kind, query, node_config),
ScalarType::STRING => match_end!(StringNum, byte_order, scalar_type, shape, agg_kind, query, node_config),
}
}
pub async fn pre_binned_bytes_for_http(
node_config: &NodeConfigCached,
query: &PreBinnedQuery,
) -> Result<Pin<Box<dyn Stream<Item = Result<Bytes, Error>> + Send>>, Error> {
if query.channel().backend != node_config.node_config.cluster.backend {
let err = Error::with_msg(format!(
"backend mismatch node: {} requested: {}",
node_config.node_config.cluster.backend,
query.channel().backend
));
return Err(err);
}
let patch_node_ix = node_ix_for_patch(query.patch(), query.channel(), &node_config.node_config.cluster);
if node_config.ix as u32 != patch_node_ix {
let err = Error::with_msg(format!(
"pre_binned_bytes_for_http node mismatch node_config.ix {} patch_node_ix {}",
node_config.ix, patch_node_ix
));
return Err(err);
}
let ret = make_num_pipeline(
query.scalar_type().clone(),
// TODO actually, make_num_pipeline should not depend on endianness.
ByteOrder::Little,
query.shape().clone(),
query.agg_kind().clone(),
query.clone(),
node_config,
)
.await?
.map(|item| match item.make_frame() {
Ok(item) => Ok(item.freeze()),
Err(e) => Err(e),
});
let ret = Box::pin(ret);
Ok(ret)
}

View File

@@ -0,0 +1,27 @@
use err::Error;
use futures_util::Stream;
use futures_util::StreamExt;
use std::pin::Pin;
use std::task::{Context, Poll};
// TODO remove after refactor.
pub struct BoxedStream<I> {
inp: Pin<Box<dyn Stream<Item = I> + Send>>,
}
impl<I> BoxedStream<I> {
pub fn new<T>(inp: T) -> Result<Self, Error>
where
T: Stream<Item = I> + Send + 'static,
{
Ok(Self { inp: Box::pin(inp) })
}
}
impl<I> Stream for BoxedStream<I> {
type Item = I;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
self.inp.poll_next_unpin(cx)
}
}

244
crates/disk/src/cache.rs Normal file
View File

@@ -0,0 +1,244 @@
use chrono::Utc;
use err::Error;
use netpod::log::*;
use netpod::timeunits::SEC;
use netpod::AggKind;
use netpod::Cluster;
use netpod::NodeConfigCached;
use netpod::PreBinnedPatchCoordEnum;
use netpod::SfDbChannel;
use serde::Deserialize;
use serde::Serialize;
use std::collections::VecDeque;
use std::io;
use std::path::PathBuf;
use std::time::Duration;
use std::time::Instant;
use tiny_keccak::Hasher;
// For file-based caching, this determined the node where the cache file is located.
// No longer needed for scylla-based caching.
pub fn node_ix_for_patch(patch_coord: &PreBinnedPatchCoordEnum, channel: &SfDbChannel, cluster: &Cluster) -> u32 {
let mut hash = tiny_keccak::Sha3::v256();
hash.update(channel.backend().as_bytes());
hash.update(channel.name().as_bytes());
/*hash.update(&patch_coord.patch_beg().to_le_bytes());
hash.update(&patch_coord.patch_end().to_le_bytes());
hash.update(&patch_coord.bin_t_len().to_le_bytes());
hash.update(&patch_coord.patch_t_len().to_le_bytes());*/
let mut out = [0; 32];
hash.finalize(&mut out);
let a = [out[0], out[1], out[2], out[3]];
let ix = u32::from_le_bytes(a) % cluster.nodes.len() as u32;
ix
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CacheFileDesc {
// What identifies a cached file?
channel: SfDbChannel,
patch: PreBinnedPatchCoordEnum,
agg_kind: AggKind,
}
impl CacheFileDesc {
pub fn new(channel: SfDbChannel, patch: PreBinnedPatchCoordEnum, agg_kind: AggKind) -> Self {
Self {
channel,
patch,
agg_kind,
}
}
pub fn hash(&self) -> String {
let mut h = tiny_keccak::Sha3::v256();
h.update(b"V000");
h.update(self.channel.backend().as_bytes());
h.update(self.channel.name().as_bytes());
h.update(format!("{}", self.agg_kind).as_bytes());
//h.update(&self.patch.spec().bin_t_len().to_le_bytes());
//h.update(&self.patch.spec().patch_t_len().to_le_bytes());
//h.update(&self.patch.ix().to_le_bytes());
let mut buf = [0; 32];
h.finalize(&mut buf);
hex::encode(&buf)
}
pub fn hash_channel(&self) -> String {
let mut h = tiny_keccak::Sha3::v256();
h.update(b"V000");
h.update(self.channel.backend().as_bytes());
h.update(self.channel.name().as_bytes());
let mut buf = [0; 32];
h.finalize(&mut buf);
hex::encode(&buf)
}
pub fn path(&self, node_config: &NodeConfigCached) -> PathBuf {
let hash = self.hash();
let hc = self.hash_channel();
node_config
.node
.cache_base_path
.join("cache")
.join(&hc[0..3])
.join(&hc[3..6])
.join(self.channel.name())
.join(format!("{}", self.agg_kind))
/*.join(format!(
"{:010}-{:010}",
self.patch.spec().bin_t_len() / SEC,
self.patch.spec().patch_t_len() / SEC
))
.join(format!("{}-{:012}", &hash[0..6], self.patch.ix()))*/
}
}
pub struct WrittenPbCache {
pub bytes: u64,
pub duration: Duration,
}
// TODO only used for old archiver
pub async fn write_pb_cache_min_max_avg_scalar<T>(
values: T,
patch: PreBinnedPatchCoordEnum,
agg_kind: AggKind,
channel: SfDbChannel,
node_config: NodeConfigCached,
) -> Result<WrittenPbCache, Error>
where
T: Serialize,
{
let cfd = CacheFileDesc {
channel: channel.clone(),
patch: patch.clone(),
agg_kind: agg_kind.clone(),
};
let path = cfd.path(&node_config);
let enc = serde_cbor::to_vec(&values)?;
let ts1 = Instant::now();
tokio::fs::create_dir_all(path.parent().unwrap()).await.map_err(|e| {
error!("can not create cache directory {:?}", path.parent());
e
})?;
let now = Utc::now();
let mut h = crc32fast::Hasher::new();
h.update(&now.timestamp_nanos().to_le_bytes());
let r = h.finalize();
let tmp_path =
path.parent()
.unwrap()
.join(format!("{}.tmp.{:08x}", path.file_name().unwrap().to_str().unwrap(), r));
let res = tokio::task::spawn_blocking({
let tmp_path = tmp_path.clone();
move || {
use fs2::FileExt;
use io::Write;
info!("try to write tmp at {:?}", tmp_path);
let mut f = std::fs::OpenOptions::new()
.create_new(true)
.write(true)
.open(&tmp_path)?;
if false {
f.lock_exclusive()?;
}
f.write_all(&enc)?;
if false {
f.unlock()?;
}
f.flush()?;
Ok::<_, Error>(enc.len())
}
})
.await
.map_err(Error::from_string)??;
tokio::fs::rename(&tmp_path, &path).await?;
let ts2 = Instant::now();
let ret = WrittenPbCache {
bytes: res as u64,
duration: ts2.duration_since(ts1),
};
Ok(ret)
}
#[derive(Serialize)]
pub struct ClearCacheAllResult {
pub log: Vec<String>,
}
pub async fn clear_cache_all(node_config: &NodeConfigCached, dry: bool) -> Result<ClearCacheAllResult, Error> {
let mut log = vec![];
log.push(format!("begin at {:?}", chrono::Utc::now()));
if dry {
log.push(format!("dry run"));
}
let mut dirs = VecDeque::new();
let mut stack = VecDeque::new();
stack.push_front(node_config.node.cache_base_path.join("cache"));
loop {
match stack.pop_front() {
Some(path) => {
info!("clear_cache_all try read dir {:?}", path);
let mut rd = tokio::fs::read_dir(path).await?;
while let Some(entry) = rd.next_entry().await? {
let path = entry.path();
match path.to_str() {
Some(_pathstr) => {
let meta = path.symlink_metadata()?;
//log.push(format!("len {:7} pathstr {}", meta.len(), pathstr,));
let filename_str = path.file_name().unwrap().to_str().unwrap();
if filename_str.ends_with("..") || filename_str.ends_with(".") {
log.push(format!("ERROR encountered . or .."));
} else {
if meta.is_dir() {
stack.push_front(path.clone());
dirs.push_front((meta.len(), path));
} else if meta.is_file() {
log.push(format!("remove file len {:7} {}", meta.len(), path.to_string_lossy()));
if !dry {
match tokio::fs::remove_file(&path).await {
Ok(_) => {}
Err(e) => {
log.push(format!(
"can not remove file {} {:?}",
path.to_string_lossy(),
e
));
}
}
}
} else {
log.push(format!("not file, note dir"));
}
}
}
None => {
log.push(format!("Invalid utf-8 path encountered"));
}
}
}
}
None => break,
}
}
log.push(format!(
"start to remove {} dirs at {:?}",
dirs.len(),
chrono::Utc::now()
));
for (len, path) in dirs {
log.push(format!("remove dir len {} {}", len, path.to_string_lossy()));
if !dry {
match tokio::fs::remove_dir(&path).await {
Ok(_) => {}
Err(e) => {
log.push(format!("can not remove dir {} {:?}", path.to_string_lossy(), e));
}
}
}
}
log.push(format!("done at {:?}", chrono::Utc::now()));
let ret = ClearCacheAllResult { log };
Ok(ret)
}

View File

@@ -0,0 +1,93 @@
use crate::SfDbChConf;
use err::thiserror;
#[allow(unused)]
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::NodeConfigCached;
use netpod::SfDbChannel;
use parse::channelconfig::extract_matching_config_entry;
use parse::channelconfig::read_local_config;
use parse::channelconfig::ChannelConfigs;
use parse::channelconfig::ConfigEntry;
use parse::channelconfig::ConfigParseError;
use std::fmt;
#[derive(Debug, thiserror::Error)]
pub enum ConfigError {
ParseError(ConfigParseError),
NotFound,
Error,
}
// impl fmt::Display for ConfigError {
// fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
// write!(fmt, "ConfigError::{self:?}")
// }
// }
impl From<ConfigParseError> for ConfigError {
fn from(value: ConfigParseError) -> Self {
match value {
ConfigParseError::FileNotFound => ConfigError::NotFound,
x => ConfigError::ParseError(x),
}
}
}
pub async fn config_entry_best_match(
range: &NanoRange,
channel: SfDbChannel,
node_config: &NodeConfigCached,
) -> Result<Option<ConfigEntry>, ConfigError> {
let channel_config = match read_local_config(channel.clone(), node_config.clone()).await {
Ok(x) => x,
Err(e) => match e {
ConfigParseError::FileNotFound => return Ok(None),
e => return Err(e.into()),
},
};
let entry_res = match extract_matching_config_entry(range, &channel_config) {
Ok(k) => k,
Err(e) => return Err(e)?,
};
match entry_res.best() {
None => Ok(None),
Some(x) => Ok(Some(x.clone())),
}
}
pub async fn channel_configs(
channel: SfDbChannel,
node_config: &NodeConfigCached,
) -> Result<ChannelConfigs, ConfigParseError> {
read_local_config(channel.clone(), node_config.clone()).await
}
pub async fn channel_config_best_match(
range: NanoRange,
channel: SfDbChannel,
node_config: &NodeConfigCached,
) -> Result<Option<SfDbChConf>, ConfigError> {
let best = config_entry_best_match(&range, channel.clone(), node_config).await?;
match best {
None => Ok(None),
Some(entry) => {
let shape = match entry.to_shape() {
Ok(k) => k,
// TODO pass error to caller
Err(_e) => return Err(ConfigError::Error)?,
};
let channel_config = SfDbChConf {
channel: channel.clone(),
keyspace: entry.ks as u8,
time_bin_size: entry.bs.clone(),
shape,
scalar_type: entry.scalar_type.clone(),
byte_order: entry.byte_order.clone(),
array: entry.is_array,
compression: entry.is_compressed,
};
Ok(Some(channel_config))
}
}
}

861
crates/disk/src/dataopen.rs Normal file
View File

@@ -0,0 +1,861 @@
use super::paths;
use crate::SfDbChConf;
use bytes::BytesMut;
use err::ErrStr;
use err::Error;
use futures_util::StreamExt;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::Node;
use netpod::SfChFetchInfo;
use netpod::TsNano;
use std::fmt;
use std::path::PathBuf;
use std::time::Instant;
use tokio::fs::File;
use tokio::fs::OpenOptions;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncSeekExt;
use tokio::io::ErrorKind;
use tokio::io::SeekFrom;
const BACKEND: &str = "testbackend-00";
pub struct Positioned {
pub file: OpenedFile,
pub found: bool,
}
pub async fn position_file_for_test(
path: &PathBuf,
range: &NanoRange,
expand_left: bool,
expand_right: bool,
) -> Result<Positioned, Error> {
position_file(path, range, expand_left, expand_right).await
}
async fn position_file(
path: &PathBuf,
range: &NanoRange,
expand_left: bool,
expand_right: bool,
) -> Result<Positioned, Error> {
trace!("position_file called expand_left {expand_left} expand_right {expand_right} {range:?} {path:?}");
assert_eq!(expand_left && expand_right, false);
match OpenOptions::new().read(true).open(&path).await {
Ok(file) => {
let index_path = PathBuf::from(format!("{}_Index", path.to_str().unwrap()));
match OpenOptions::new().read(true).open(&index_path).await {
Ok(mut index_file) => {
let meta = index_file.metadata().await?;
if meta.len() > 1024 * 1024 * 120 {
let msg = format!("too large index file {} bytes for {:?}", meta.len(), index_path);
error!("{}", msg);
return Err(Error::with_msg(msg));
} else if meta.len() > 1024 * 1024 * 80 {
let msg = format!("very large index file {} bytes for {:?}", meta.len(), index_path);
warn!("{}", msg);
} else if meta.len() > 1024 * 1024 * 20 {
let msg = format!("large index file {} bytes for {:?}", meta.len(), index_path);
info!("{}", msg);
}
if meta.len() < 2 {
return Err(Error::with_msg(format!(
"bad meta len {} for {:?}",
meta.len(),
index_path
)));
}
if meta.len() % 16 != 2 {
return Err(Error::with_msg(format!(
"bad meta len {} for {:?}",
meta.len(),
index_path
)));
}
let mut buf = BytesMut::with_capacity(meta.len() as usize);
buf.resize(buf.capacity(), 0);
index_file.read_exact(&mut buf).await?;
let gg = if expand_left {
super::index::find_largest_smaller_than(range.clone(), expand_right, &buf[2..])
} else {
super::index::find_ge(range.clone(), expand_right, &buf[2..])
};
let gg = match gg {
Ok(x) => x,
Err(e) => {
error!("can not position file for range {range:?} expand_right {expand_right:?} buflen {buflen}", buflen = buf.len());
return Err(e);
}
};
match gg {
Some(o) => {
let mut file = file;
file.seek(SeekFrom::Start(o.1)).await?;
//info!("position_file case A {:?}", path);
let g = OpenedFile {
file: Some(file),
path: path.clone(),
positioned: true,
index: true,
nreads: 0,
pos: o.1,
};
return Ok(Positioned { file: g, found: true });
}
None => {
//info!("position_file case B {:?}", path);
let g = OpenedFile {
file: Some(file),
path: path.clone(),
positioned: false,
index: true,
nreads: 0,
pos: 0,
};
return Ok(Positioned { file: g, found: false });
}
}
}
Err(e) => match e.kind() {
ErrorKind::NotFound => {
let ts1 = Instant::now();
let res = if expand_left {
super::index::position_static_len_datafile_at_largest_smaller_than(
file,
range.clone(),
expand_right,
)
.await?
} else {
super::index::position_static_len_datafile(file, range.clone(), expand_right).await?
};
let ts2 = Instant::now();
if false {
// TODO collect for stats:
let dur = ts2.duration_since(ts1);
info!("position_static_len_datafile took ms {}", dur.as_millis());
}
let file = res.0;
if res.1 {
//info!("position_file case C {:?}", path);
let g = OpenedFile {
file: Some(file),
path: path.clone(),
positioned: true,
index: false,
nreads: res.2,
pos: res.3,
};
return Ok(Positioned { file: g, found: true });
} else {
//info!("position_file case D {:?}", path);
let g = OpenedFile {
file: Some(file),
path: path.clone(),
positioned: false,
index: false,
nreads: res.2,
pos: 0,
};
return Ok(Positioned { file: g, found: false });
}
}
_ => Err(e)?,
},
}
}
Err(e) => {
warn!("can not open {:?} error {:?}", path, e);
let g = OpenedFile {
file: None,
path: path.clone(),
positioned: false,
index: true,
nreads: 0,
pos: 0,
};
return Ok(Positioned { file: g, found: false });
}
}
}
pub struct OpenedFile {
pub path: PathBuf,
pub file: Option<File>,
pub positioned: bool,
pub index: bool,
pub nreads: u32,
pub pos: u64,
}
#[derive(Debug)]
pub struct OpenedFileSet {
pub timebin: u64,
pub files: Vec<OpenedFile>,
}
impl fmt::Debug for OpenedFile {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("OpenedFile")
.field("path", &self.path)
.field("file", &self.file)
.field("positioned", &self.positioned)
.field("index", &self.index)
.field("nreads", &self.nreads)
.finish()
}
}
pub fn open_files(
range: &NanoRange,
fetch_info: &SfChFetchInfo,
node: Node,
) -> async_channel::Receiver<Result<OpenedFileSet, Error>> {
let (chtx, chrx) = async_channel::bounded(2);
let range = range.clone();
let fetch_info = fetch_info.clone();
tokio::spawn(async move {
match open_files_inner(&chtx, &range, &fetch_info, node).await {
Ok(_) => {}
Err(e) => {
let e = e.add_public_msg(format!(
"Can not open file for channel: {fetch_info:?} range: {range:?}"
));
match chtx.send(Err(e.into())).await {
Ok(_) => {}
Err(e) => {
// This case is fine.
debug!("open_files channel send error {:?}", e);
}
}
}
}
});
chrx
}
async fn open_files_inner(
chtx: &async_channel::Sender<Result<OpenedFileSet, Error>>,
range: &NanoRange,
fetch_info: &SfChFetchInfo,
node: Node,
) -> Result<(), Error> {
let fetch_info = fetch_info.clone();
let timebins = get_timebins(&fetch_info, node.clone()).await?;
if timebins.len() == 0 {
return Ok(());
}
for &tb in &timebins {
let ts_bin = TsNano(tb * fetch_info.bs().ns());
if ts_bin.ns() >= range.end {
continue;
}
if ts_bin.ns() + fetch_info.bs().ns() <= range.beg {
continue;
}
let mut a = Vec::new();
for path in paths::datapaths_for_timebin(tb, &fetch_info, &node).await? {
let w = position_file(&path, range, false, false).await?;
if w.found {
a.push(w.file);
}
}
let h = OpenedFileSet { timebin: tb, files: a };
debug!(
"----- open_files_inner giving OpenedFileSet with {} files",
h.files.len()
);
chtx.send(Ok(h)).await.errstr()?;
}
Ok(())
}
/**
Provide the stream of positioned data files which are relevant for the given parameters.
Expanded to one event before and after the requested range, if exists.
*/
pub fn open_expanded_files(
range: &NanoRange,
fetch_info: &SfChFetchInfo,
node: Node,
) -> async_channel::Receiver<Result<OpenedFileSet, Error>> {
let (chtx, chrx) = async_channel::bounded(2);
let range = range.clone();
let fetch_info = fetch_info.clone();
tokio::spawn(async move {
match open_expanded_files_inner(&chtx, &range, &fetch_info, node).await {
Ok(_) => {}
Err(e) => match chtx.send(Err(e.into())).await {
Ok(_) => {}
Err(e) => {
// To be expected
debug!("open_files channel send error {:?}", e);
}
},
}
});
chrx
}
async fn get_timebins(fetch_info: &SfChFetchInfo, node: Node) -> Result<Vec<u64>, Error> {
let mut timebins = Vec::new();
let p0 = paths::channel_timebins_dir_path(&fetch_info, &node)?;
match tokio::fs::read_dir(&p0).await {
Ok(rd) => {
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
while let Some(e) = rd.next().await {
let e = e?;
let dn = e
.file_name()
.into_string()
.map_err(|e| Error::with_msg(format!("Bad OS path {:?}", e)))?;
if dn.len() != 19 {
warn!("get_timebins weird directory {:?} p0 {:?}", e.path(), p0);
}
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
if vv == 19 {
timebins.push(dn.parse::<u64>()?);
}
}
timebins.sort_unstable();
Ok(timebins)
}
Err(e) => {
debug!("get_timebins no timebins for {:?} {:?} p0 {:?}", fetch_info, e, p0);
Ok(Vec::new())
}
}
}
async fn open_expanded_files_inner(
chtx: &async_channel::Sender<Result<OpenedFileSet, Error>>,
range: &NanoRange,
fetch_info: &SfChFetchInfo,
node: Node,
) -> Result<(), Error> {
let fetch_info = fetch_info.clone();
let timebins = get_timebins(&fetch_info, node.clone()).await?;
if timebins.len() == 0 {
return Ok(());
}
let mut p1 = None;
for (i1, tb) in timebins.iter().enumerate().rev() {
let ts_bin = TsNano(tb * fetch_info.bs().ns());
if ts_bin.ns() <= range.beg {
p1 = Some(i1);
break;
}
}
let mut p1 = if let Some(i1) = p1 { i1 } else { 0 };
if p1 >= timebins.len() {
return Err(Error::with_msg(format!(
"logic error p1 {} range {:?} fetch_info {:?}",
p1, range, fetch_info
)));
}
let mut found_pre = false;
loop {
let tb = timebins[p1];
let mut a = Vec::new();
for path in paths::datapaths_for_timebin(tb, &fetch_info, &node).await? {
let w = position_file(&path, range, true, false).await?;
if w.found {
debug!("----- open_expanded_files_inner w.found for {:?}", path);
a.push(w.file);
found_pre = true;
}
}
let h = OpenedFileSet { timebin: tb, files: a };
debug!(
"----- open_expanded_files_inner giving OpenedFileSet with {} files",
h.files.len()
);
chtx.send(Ok(h)).await.errstr()?;
if found_pre {
p1 += 1;
break;
} else if p1 == 0 {
break;
} else {
p1 -= 1;
}
}
if found_pre {
// Append all following positioned files.
while p1 < timebins.len() {
let tb = timebins[p1];
let mut a = Vec::new();
for path in paths::datapaths_for_timebin(tb, &fetch_info, &node).await? {
let w = position_file(&path, range, false, true).await?;
if w.found {
a.push(w.file);
}
}
let h = OpenedFileSet { timebin: tb, files: a };
chtx.send(Ok(h)).await.errstr()?;
p1 += 1;
}
} else {
// TODO emit statsfor this or log somewhere?
debug!("Could not find some event before the requested range, fall back to standard file list.");
// Try to locate files according to non-expand-algorithm.
open_files_inner(chtx, range, &fetch_info, node).await?;
}
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
use err::Error;
use netpod::range::evrange::NanoRange;
use netpod::test_data_base_path_databuffer;
use netpod::timeunits::*;
use std::path::PathBuf;
use tokio::fs::OpenOptions;
fn scalar_file_path() -> PathBuf {
test_data_base_path_databuffer()
.join("node00/ks_2/byTime/scalar-i32-be")
.join("0000000000000000001/0000000000/0000000000086400000_00000_Data")
}
fn wave_file_path() -> PathBuf {
test_data_base_path_databuffer()
.join("node00/ks_3/byTime/wave-f64-be-n21")
.join("0000000000000000001/0000000000/0000000000086400000_00000_Data")
}
#[test]
fn position_basic_file_at_begin() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY,
end: DAY + MS * 20000,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 23);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_basic_file_for_empty_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY + MS * 80000,
end: DAY + MS * 80000,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, false);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_basic_file_at_begin_for_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY,
end: DAY + MS * 300000,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 23);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_basic_file_at_inner() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY + MS * 4000,
end: DAY + MS * 7000,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 179);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
// TODO add same test for WAVE
#[test]
fn position_basic_file_at_inner_for_too_small_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY + MS * 1501,
end: DAY + MS * 1502,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, false);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
// TODO add same test for WAVE
#[test]
fn position_basic_file_starts_after_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: HOUR * 22,
end: HOUR * 23,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, false);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_basic_file_ends_before_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY * 2,
end: DAY * 2 + HOUR,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, false);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_basic_index() -> Result<(), Error> {
let fut = async {
let path = wave_file_path();
let range = NanoRange {
beg: DAY + MS * 4000,
end: DAY + MS * 90000,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, true);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 184);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_basic_index_too_small_range() -> Result<(), Error> {
let fut = async {
let path = wave_file_path();
let range = NanoRange {
beg: DAY + MS * 3100,
end: DAY + MS * 3200,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, true);
assert_eq!(res.file.positioned, false);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_basic_index_starts_after_range() -> Result<(), Error> {
let fut = async {
let path = wave_file_path();
let range = NanoRange {
beg: HOUR * 10,
end: HOUR * 12,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, true);
assert_eq!(res.file.positioned, false);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_basic_index_ends_before_range() -> Result<(), Error> {
let fut = async {
let path = wave_file_path();
let range = NanoRange {
beg: DAY * 2,
end: DAY * 2 + MS * 40000,
};
let res = position_file(&path, &range, false, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, true);
assert_eq!(res.file.positioned, false);
assert_eq!(res.file.pos, 0);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
//
// -------------- Expanded -----------------------------------
//
#[test]
fn position_expand_file_at_begin_no_fallback() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY + MS * 3000,
end: DAY + MS * 40000,
};
let file = OpenOptions::new().read(true).open(path).await?;
let res =
super::super::index::position_static_len_datafile_at_largest_smaller_than(file, range.clone(), true)
.await?;
assert_eq!(res.1, true);
assert_eq!(res.3, 75);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok::<_, Error>(())
}
#[test]
fn position_expand_left_file_at_evts_file_begin() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY,
end: DAY + MS * 40000,
};
let res = position_file(&path, &range, true, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, false);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_expand_right_file_at_evts_file_begin() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY,
end: DAY + MS * 40000,
};
let res = position_file(&path, &range, false, true).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 23);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn position_expand_left_file_at_evts_file_within() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY + MS * 3000,
end: DAY + MS * 40000,
};
let res = position_file(&path, &range, true, false).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 75);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
// ------- TODO do the same with Wave (index)
#[test]
fn position_expand_left_file_ends_before_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY * 2,
end: DAY * 2 + MS * 40000,
};
let res = position_file(&path, &range, true, false).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 2995171);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
// ------- TODO do the same with Wave (index)
#[test]
fn position_expand_left_file_begins_exactly_after_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: HOUR * 23,
end: DAY,
};
let res = position_file(&path, &range, true, false).await?;
assert_eq!(res.found, false);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, false);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
// ------- TODO do the same with Wave (index)
#[test]
fn position_expand_right_file_begins_exactly_after_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: HOUR * 23,
end: DAY,
};
let res = position_file(&path, &range, false, true).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 23);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
// TODO add same test for indexed
#[test]
fn position_expand_left_basic_file_at_inner_for_too_small_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY + MS * 1501,
end: DAY + MS * 1502,
};
let res = position_file(&path, &range, true, false).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 75);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
// TODO add same test for indexed
#[test]
fn position_expand_right_basic_file_at_inner_for_too_small_range() -> Result<(), Error> {
let fut = async {
let path = scalar_file_path();
let range = NanoRange {
beg: DAY + MS * 1501,
end: DAY + MS * 1502,
};
let res = position_file(&path, &range, false, true).await?;
assert_eq!(res.found, true);
assert_eq!(res.file.index, false);
assert_eq!(res.file.positioned, true);
assert_eq!(res.file.pos, 127);
Ok::<_, Error>(())
};
taskrun::run(fut)?;
Ok(())
}
#[test]
fn expanded_file_list() {
let range = NanoRange {
beg: DAY + HOUR * 5,
end: DAY + HOUR * 8,
};
let chn = netpod::SfDbChannel::from_name(BACKEND, "scalar-i32-be");
// TODO read config from disk? Or expose the config from data generator?
let fetch_info = todo!();
// let fetch_info = SfChFetchInfo {
// channel: chn,
// keyspace: 2,
// time_bin_size: TsNano(DAY),
// scalar_type: netpod::ScalarType::I32,
// byte_order: netpod::ByteOrder::Big,
// shape: netpod::Shape::Scalar,
// array: false,
// compression: false,
// };
let cluster = netpod::test_cluster();
let task = async move {
let mut paths = Vec::new();
let mut files = open_expanded_files(&range, &fetch_info, cluster.nodes[0].clone());
while let Some(file) = files.next().await {
match file {
Ok(k) => {
debug!("opened file: {:?}", k);
paths.push(k.files);
}
Err(e) => {
error!("error while trying to open {:?}", e);
break;
}
}
}
if paths.len() != 2 {
return Err(Error::with_msg_no_trace(format!(
"expected 2 files got {n}",
n = paths.len()
)));
}
Ok::<_, Error>(())
};
taskrun::run(task).unwrap();
}
}

448
crates/disk/src/decode.rs Normal file
View File

@@ -0,0 +1,448 @@
use crate::eventblobs::EventChunkerMultifile;
use err::Error;
use futures_util::Stream;
use futures_util::StreamExt;
use items_0::scalar_ops::ScalarOps;
use items_0::streamitem::RangeCompletableItem;
use items_0::streamitem::Sitemty;
use items_0::streamitem::StreamItem;
use items_0::Events;
use items_0::WithLen;
use items_2::eventfull::EventFull;
use items_2::eventsdim0::EventsDim0;
use items_2::eventsdim1::EventsDim1;
use netpod::log::*;
use netpod::AggKind;
use netpod::ScalarType;
use netpod::Shape;
use std::marker::PhantomData;
use std::mem;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
pub trait Endianness: Send + Unpin {
fn is_big() -> bool;
}
pub struct LittleEndian {}
pub struct BigEndian {}
impl Endianness for LittleEndian {
fn is_big() -> bool {
false
}
}
impl Endianness for BigEndian {
fn is_big() -> bool {
true
}
}
pub enum Endian {
Little,
Big,
}
pub trait ScalarValueFromBytes<STY> {
fn convert(buf: &[u8], endian: Endian) -> Result<STY, Error>;
fn convert_dim1(buf: &[u8], endian: Endian, n: usize) -> Result<Vec<STY>, Error>;
}
macro_rules! impl_scalar_value_from_bytes {
($nty:ident, $nl:expr) => {
impl ScalarValueFromBytes<$nty> for $nty {
// Error in data on disk:
// Can not rely on byte order as stated in the channel config.
// Endianness in sf-databuffer can be specified for each event.
fn convert(buf: &[u8], endian: Endian) -> Result<$nty, Error> {
//$nty::$ec(*arrayref::array_ref![buf, 0, $nl])
use Endian::*;
let ret = match endian {
Little => $nty::from_le_bytes(buf[..$nl].try_into()?),
Big => $nty::from_be_bytes(buf[..$nl].try_into()?),
};
Ok(ret)
}
fn convert_dim1(buf: &[u8], endian: Endian, n: usize) -> Result<Vec<$nty>, Error> {
let ret = buf
.chunks_exact(n.min($nl))
.map(|b2| {
use Endian::*;
let ret = match endian {
Little => $nty::from_le_bytes(b2[..$nl].try_into().unwrap()),
Big => $nty::from_be_bytes(b2[..$nl].try_into().unwrap()),
};
ret
})
.collect();
Ok(ret)
}
}
};
}
impl_scalar_value_from_bytes!(u8, 1);
impl_scalar_value_from_bytes!(u16, 2);
impl_scalar_value_from_bytes!(u32, 4);
impl_scalar_value_from_bytes!(u64, 8);
impl_scalar_value_from_bytes!(i8, 1);
impl_scalar_value_from_bytes!(i16, 2);
impl_scalar_value_from_bytes!(i32, 4);
impl_scalar_value_from_bytes!(i64, 8);
impl_scalar_value_from_bytes!(f32, 4);
impl_scalar_value_from_bytes!(f64, 8);
impl ScalarValueFromBytes<String> for String {
fn convert(buf: &[u8], _endian: Endian) -> Result<String, Error> {
let s = if buf.len() >= 255 {
String::from_utf8_lossy(&buf[..255])
} else {
String::from_utf8_lossy(buf)
};
Ok(s.into())
}
fn convert_dim1(buf: &[u8], _endian: Endian, _n: usize) -> Result<Vec<String>, Error> {
let s = if buf.len() >= 255 {
String::from_utf8_lossy(&buf[..255])
} else {
String::from_utf8_lossy(buf)
};
Ok(vec![s.into()])
}
}
impl ScalarValueFromBytes<bool> for bool {
fn convert(buf: &[u8], _endian: Endian) -> Result<bool, Error> {
if buf.len() >= 1 {
if buf[0] != 0 {
Ok(true)
} else {
Ok(false)
}
} else {
Ok(false)
}
}
fn convert_dim1(buf: &[u8], _endian: Endian, n: usize) -> Result<Vec<bool>, Error> {
let nn = buf.len().min(n);
Ok(buf.iter().take(nn).map(|&x| x != 0).collect())
}
}
pub trait ValueFromBytes: Send {
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error>;
}
pub trait ValueDim0FromBytes {
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error>;
}
pub trait ValueDim1FromBytes {
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error>;
}
pub struct ValueDim0FromBytesImpl<STY>
where
STY: ScalarOps,
{
_m1: PhantomData<STY>,
}
impl<STY> ValueDim0FromBytesImpl<STY>
where
STY: ScalarOps + ScalarValueFromBytes<STY>,
{
fn boxed() -> Box<dyn ValueFromBytes> {
Box::new(Self {
_m1: Default::default(),
})
}
}
impl<STY> ValueDim0FromBytes for ValueDim0FromBytesImpl<STY>
where
STY: ScalarOps + ScalarValueFromBytes<STY>,
{
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error> {
if let Some(evs) = events.as_any_mut().downcast_mut::<EventsDim0<STY>>() {
let v = <STY as ScalarValueFromBytes<STY>>::convert(buf, endian)?;
evs.values.push_back(v);
evs.tss.push_back(ts);
evs.pulses.push_back(pulse);
Ok(())
} else {
Err(Error::with_msg_no_trace("unexpected container"))
}
}
}
impl<STY> ValueFromBytes for ValueDim0FromBytesImpl<STY>
where
STY: ScalarOps + ScalarValueFromBytes<STY>,
{
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error> {
ValueDim0FromBytes::convert(self, ts, pulse, buf, endian, events)
}
}
pub struct ValueDim1FromBytesImpl<STY>
where
STY: ScalarOps,
{
shape: Shape,
_m1: PhantomData<STY>,
}
impl<STY> ValueDim1FromBytesImpl<STY>
where
STY: ScalarOps + ScalarValueFromBytes<STY>,
{
fn boxed(shape: Shape) -> Box<dyn ValueFromBytes> {
Box::new(Self {
shape,
_m1: Default::default(),
})
}
}
impl<STY> ValueFromBytes for ValueDim1FromBytesImpl<STY>
where
STY: ScalarOps + ScalarValueFromBytes<STY>,
{
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error> {
ValueDim1FromBytes::convert(self, ts, pulse, buf, endian, events)
}
}
impl<STY> ValueDim1FromBytes for ValueDim1FromBytesImpl<STY>
where
STY: ScalarOps + ScalarValueFromBytes<STY>,
{
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error> {
if let Some(evs) = events.as_any_mut().downcast_mut::<EventsDim1<STY>>() {
let n = if let Shape::Wave(n) = self.shape {
n
} else {
return Err(Error::with_msg_no_trace("ValueDim1FromBytesImpl bad shape"));
};
let v = <STY as ScalarValueFromBytes<STY>>::convert_dim1(buf, endian, n as _)?;
evs.values.push_back(v);
evs.tss.push_back(ts);
evs.pulses.push_back(pulse);
Ok(())
} else {
Err(Error::with_msg_no_trace("unexpected container"))
}
}
}
fn make_scalar_conv(
scalar_type: &ScalarType,
shape: &Shape,
agg_kind: &AggKind,
) -> Result<Box<dyn ValueFromBytes>, Error> {
let ret = match agg_kind {
AggKind::EventBlobs => todo!("make_scalar_conv EventBlobs"),
AggKind::Plain
| AggKind::DimXBinsN(_)
| AggKind::DimXBins1
| AggKind::TimeWeightedScalar
| AggKind::PulseIdDiff => match shape {
Shape::Scalar => match scalar_type {
ScalarType::U8 => ValueDim0FromBytesImpl::<u8>::boxed(),
ScalarType::U16 => ValueDim0FromBytesImpl::<u16>::boxed(),
ScalarType::U32 => ValueDim0FromBytesImpl::<u32>::boxed(),
ScalarType::U64 => ValueDim0FromBytesImpl::<u64>::boxed(),
ScalarType::I8 => ValueDim0FromBytesImpl::<i8>::boxed(),
ScalarType::I16 => ValueDim0FromBytesImpl::<i16>::boxed(),
ScalarType::I32 => ValueDim0FromBytesImpl::<i32>::boxed(),
ScalarType::I64 => ValueDim0FromBytesImpl::<i64>::boxed(),
ScalarType::F32 => ValueDim0FromBytesImpl::<f32>::boxed(),
ScalarType::F64 => ValueDim0FromBytesImpl::<f64>::boxed(),
ScalarType::BOOL => ValueDim0FromBytesImpl::<bool>::boxed(),
ScalarType::STRING => ValueDim0FromBytesImpl::<String>::boxed(),
},
Shape::Wave(_) => {
let shape = shape.clone();
match scalar_type {
ScalarType::U8 => ValueDim1FromBytesImpl::<u8>::boxed(shape),
ScalarType::U16 => ValueDim1FromBytesImpl::<u16>::boxed(shape),
ScalarType::U32 => ValueDim1FromBytesImpl::<u32>::boxed(shape),
ScalarType::U64 => ValueDim1FromBytesImpl::<u64>::boxed(shape),
ScalarType::I8 => ValueDim1FromBytesImpl::<i8>::boxed(shape),
ScalarType::I16 => ValueDim1FromBytesImpl::<i16>::boxed(shape),
ScalarType::I32 => ValueDim1FromBytesImpl::<i32>::boxed(shape),
ScalarType::I64 => ValueDim1FromBytesImpl::<i64>::boxed(shape),
ScalarType::F32 => ValueDim1FromBytesImpl::<f32>::boxed(shape),
ScalarType::F64 => ValueDim1FromBytesImpl::<f64>::boxed(shape),
ScalarType::BOOL => ValueDim1FromBytesImpl::<bool>::boxed(shape),
ScalarType::STRING => ValueDim1FromBytesImpl::<String>::boxed(shape),
}
}
Shape::Image(_, _) => todo!("make_scalar_conv Image"),
},
};
Ok(ret)
}
pub struct EventsDynStream {
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
events_full: EventChunkerMultifile,
events_out: Box<dyn Events>,
scalar_conv: Box<dyn ValueFromBytes>,
emit_threshold: usize,
done: bool,
complete: bool,
}
impl EventsDynStream {
pub fn type_name() -> &'static str {
std::any::type_name::<Self>()
}
pub fn new(
scalar_type: ScalarType,
shape: Shape,
agg_kind: AggKind,
events_full: EventChunkerMultifile,
) -> Result<Self, Error> {
let st = &scalar_type;
let sh = &shape;
let ag = &agg_kind;
warn!("TODO EventsDynStream::new feed through transform");
// TODO do we need/want the empty item from here?
let events_out = items_2::empty::empty_events_dyn_ev(st, sh)?;
let scalar_conv = make_scalar_conv(st, sh, ag)?;
let emit_threshold = match &shape {
Shape::Scalar => 2048,
Shape::Wave(_) => 64,
Shape::Image(_, _) => 1,
};
let ret = Self {
scalar_type,
shape,
agg_kind,
events_full,
events_out,
scalar_conv,
emit_threshold,
done: false,
complete: false,
};
Ok(ret)
}
fn replace_events_out(&mut self) -> Result<Box<dyn Events>, Error> {
let st = &self.scalar_type;
let sh = &self.shape;
error!("TODO replace_events_out feed through transform");
// TODO do we need/want the empty item from here?
let empty = items_2::empty::empty_events_dyn_ev(st, sh)?;
let evs = mem::replace(&mut self.events_out, empty);
Ok(evs)
}
fn handle_event_full(&mut self, item: EventFull) -> Result<(), Error> {
if item.len() >= self.emit_threshold {
info!("handle_event_full item len {}", item.len());
}
for (((buf, &be), &ts), &pulse) in item
.blobs
.iter()
.zip(item.be.iter())
.zip(item.tss.iter())
.zip(item.pulses.iter())
{
let endian = if be { Endian::Big } else { Endian::Little };
let buf = if let Some(x) = buf {
x
} else {
return Err(Error::with_msg_no_trace("no buf in event"));
};
self.scalar_conv
.convert(ts, pulse, buf, endian, self.events_out.as_mut())?;
}
Ok(())
}
fn handle_stream_item(
&mut self,
item: StreamItem<RangeCompletableItem<EventFull>>,
) -> Result<Option<Sitemty<Box<dyn Events>>>, Error> {
let ret = match item {
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => {
Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))
}
RangeCompletableItem::Data(item) => match self.handle_event_full(item) {
Ok(()) => {
// TODO collect stats.
if self.events_out.len() >= self.emit_threshold {
let evs = self.replace_events_out()?;
Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(evs))))
} else {
None
}
}
Err(e) => Some(Err(e)),
},
},
StreamItem::Log(item) => Some(Ok(StreamItem::Log(item))),
StreamItem::Stats(item) => Some(Ok(StreamItem::Stats(item))),
};
Ok(ret)
}
}
impl Stream for EventsDynStream {
type Item = Sitemty<Box<dyn Events>>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if self.complete {
panic!("{} poll_next on complete", Self::type_name())
} else if self.done {
self.complete = true;
Ready(None)
} else {
match self.events_full.poll_next_unpin(cx) {
Ready(Some(Ok(item))) => match self.handle_stream_item(item) {
Ok(Some(item)) => Ready(Some(item)),
Ok(None) => continue,
Err(e) => {
self.done = true;
Ready(Some(Err(e)))
}
},
Ready(Some(Err(e))) => {
self.done = true;
Ready(Some(Err(e)))
}
Ready(None) => {
// Produce a last one even if it is empty.
match self.replace_events_out() {
Ok(item) => {
self.done = true;
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))))
}
Err(e) => {
self.done = true;
Ready(Some(Err(e)))
}
}
}
Pending => Pending,
}
};
}
}
}

807
crates/disk/src/disk.rs Normal file
View File

@@ -0,0 +1,807 @@
#[cfg(test)]
pub mod aggtest;
pub mod binnedstream;
pub mod cache;
pub mod channelconfig;
pub mod dataopen;
pub mod decode;
pub mod eventblobs;
pub mod eventchunker;
pub mod frame;
pub mod gen;
pub mod index;
pub mod merge;
pub mod paths;
pub mod raw;
pub mod read3;
pub mod read4;
pub mod streamlog;
pub use parse;
use bytes::Bytes;
use bytes::BytesMut;
use err::Error;
use futures_util::future::FusedFuture;
use futures_util::FutureExt;
use futures_util::Stream;
use futures_util::StreamExt;
use futures_util::TryFutureExt;
use netpod::log::*;
use netpod::ByteOrder;
use netpod::DiskIoTune;
use netpod::DtNano;
use netpod::Node;
use netpod::ReadSys;
use netpod::ScalarType;
use netpod::SfDbChannel;
use netpod::Shape;
use serde::Deserialize;
use serde::Serialize;
use std::collections::VecDeque;
use std::future::Future;
use std::io::SeekFrom;
use std::mem;
use std::os::unix::prelude::AsRawFd;
use std::path::PathBuf;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use std::time::Duration;
use std::time::Instant;
use streams::dtflags::ARRAY;
use streams::dtflags::BIG_ENDIAN;
use streams::dtflags::COMPRESSION;
use streams::dtflags::SHAPE;
use streams::filechunkread::FileChunkRead;
use tokio::fs::File;
use tokio::fs::OpenOptions;
use tokio::io::AsyncRead;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncSeekExt;
use tokio::io::ReadBuf;
use tokio::sync::mpsc;
// TODO move to databuffer-specific crate
// TODO duplicate of SfChFetchInfo?
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SfDbChConf {
pub channel: SfDbChannel,
pub keyspace: u8,
pub time_bin_size: DtNano,
pub scalar_type: ScalarType,
pub compression: bool,
pub shape: Shape,
pub array: bool,
pub byte_order: ByteOrder,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AggQuerySingleChannel {
pub channel_config: SfDbChConf,
pub timebin: u32,
pub tb_file_count: u32,
pub buffer_size: u32,
}
// TODO transform this into a self-test or remove.
pub async fn read_test_1(query: &AggQuerySingleChannel, node: Node) -> Result<netpod::BodyStream, Error> {
let path = paths::datapath(query.timebin as u64, &query.channel_config, 0, &node);
debug!("try path: {:?}", path);
let fin = OpenOptions::new().read(true).open(path).await?;
let meta = fin.metadata().await;
debug!("file meta {:?}", meta);
let stream = netpod::BodyStream {
inner: Box::new(FileReader {
file: fin,
nreads: 0,
buffer_size: query.buffer_size,
}),
};
Ok(stream)
}
struct FileReader {
file: tokio::fs::File,
nreads: u32,
buffer_size: u32,
}
impl Stream for FileReader {
type Item = Result<Bytes, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
err::todo();
// TODO remove if no longer used?
let blen = self.buffer_size as usize;
let mut buf2 = BytesMut::with_capacity(blen);
buf2.resize(buf2.capacity(), 0);
if buf2.as_mut().len() != blen {
panic!("logic");
}
let mut buf = tokio::io::ReadBuf::new(buf2.as_mut());
if buf.filled().len() != 0 {
panic!("logic");
}
match Pin::new(&mut self.file).poll_read(cx, &mut buf) {
Poll::Ready(Ok(_)) => {
let rlen = buf.filled().len();
if rlen == 0 {
Poll::Ready(None)
} else {
if rlen != blen {
info!("short read {} of {}", buf.filled().len(), blen);
}
self.nreads += 1;
Poll::Ready(Some(Ok(buf2.freeze())))
}
}
Poll::Ready(Err(e)) => Poll::Ready(Some(Err(Error::from(e)))),
Poll::Pending => Poll::Pending,
}
}
}
pub struct Fopen1 {
#[allow(dead_code)]
opts: OpenOptions,
fut: Pin<Box<dyn Future<Output = Result<File, std::io::Error>>>>,
term: bool,
}
impl Fopen1 {
pub fn new(path: PathBuf) -> Self {
let fut = Box::pin(async {
let mut o1 = OpenOptions::new();
let o2 = o1.read(true);
let res = o2.open(path);
res.await
}) as Pin<Box<dyn Future<Output = Result<File, std::io::Error>>>>;
let _fut2: Box<dyn Future<Output = u32>> = Box::new(async { 123 });
Self {
opts: OpenOptions::new(),
fut,
term: false,
}
}
}
impl Future for Fopen1 {
type Output = Result<File, Error>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let g = self.fut.as_mut();
match g.poll(cx) {
Poll::Ready(Ok(k)) => {
self.term = true;
Poll::Ready(Ok(k))
}
Poll::Ready(Err(k)) => {
self.term = true;
Poll::Ready(Err(k.into()))
}
Poll::Pending => Poll::Pending,
}
}
}
impl FusedFuture for Fopen1 {
fn is_terminated(&self) -> bool {
self.term
}
}
unsafe impl Send for Fopen1 {}
pub struct FileContentStream {
file: File,
disk_io_tune: DiskIoTune,
read_going: bool,
buf: BytesMut,
ts1: Instant,
nlog: usize,
done: bool,
complete: bool,
}
impl FileContentStream {
pub fn type_name() -> &'static str {
std::any::type_name::<Self>()
}
pub fn new(file: File, disk_io_tune: DiskIoTune) -> Self {
Self {
file,
disk_io_tune,
read_going: false,
buf: BytesMut::new(),
ts1: Instant::now(),
nlog: 0,
done: false,
complete: false,
}
}
}
impl Stream for FileContentStream {
type Item = Result<FileChunkRead, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if self.complete {
panic!("{} poll_next on complete", Self::type_name())
} else if self.done {
self.complete = true;
Ready(None)
} else {
let mut buf = if !self.read_going {
self.ts1 = Instant::now();
let mut buf = BytesMut::new();
buf.resize(self.disk_io_tune.read_buffer_len, 0);
buf
} else {
mem::replace(&mut self.buf, BytesMut::new())
};
let mutsl = buf.as_mut();
let mut rb = ReadBuf::new(mutsl);
let f1 = &mut self.file;
let f2 = Pin::new(f1);
let pollres = AsyncRead::poll_read(f2, cx, &mut rb);
match pollres {
Ready(Ok(_)) => {
let nread = rb.filled().len();
buf.truncate(nread);
self.read_going = false;
let ts2 = Instant::now();
if nread == 0 {
let ret = FileChunkRead::with_buf_dur(buf, ts2.duration_since(self.ts1));
self.done = true;
Ready(Some(Ok(ret)))
} else {
let ret = FileChunkRead::with_buf_dur(buf, ts2.duration_since(self.ts1));
if false && self.nlog < 6 {
self.nlog += 1;
info!("{:?} ret {:?}", self.disk_io_tune, ret);
}
Ready(Some(Ok(ret)))
}
}
Ready(Err(e)) => {
self.done = true;
Ready(Some(Err(e.into())))
}
Pending => Pending,
}
};
}
}
}
fn start_read5(
path: PathBuf,
file: File,
tx: async_channel::Sender<Result<FileChunkRead, Error>>,
disk_io_tune: DiskIoTune,
) -> Result<(), Error> {
let fut = async move {
let mut file = file;
let pos_beg = match file.stream_position().await {
Ok(x) => x,
Err(e) => {
error!("stream_position {e} {path:?}");
if let Err(_) = tx
.send(Err(Error::with_msg_no_trace(format!("seek error {path:?}"))))
.await
{
error!("broken channel");
}
return;
}
};
let mut pos = pos_beg;
info!("read5 begin {disk_io_tune:?}");
loop {
let mut buf = BytesMut::new();
buf.resize(disk_io_tune.read_buffer_len, 0);
match tokio::time::timeout(Duration::from_millis(8000), file.read(&mut buf)).await {
Ok(Ok(n)) => {
if n == 0 {
//info!("read5 EOF pos_beg {pos_beg} pos {pos} path {path:?}");
break;
}
pos += n as u64;
buf.truncate(n);
let item = FileChunkRead::with_buf(buf);
match tx.send(Ok(item)).await {
Ok(()) => {}
Err(_) => {
//error!("broken channel");
break;
}
}
}
Ok(Err(e)) => match tx.send(Err(e.into())).await {
Ok(()) => {
break;
}
Err(_) => {
//error!("broken channel");
break;
}
},
Err(_) => {
let msg = format!("I/O timeout pos_beg {pos_beg} pos {pos} path {path:?}");
error!("{msg}");
let e = Error::with_msg_no_trace(msg);
match tx.send(Err(e)).await {
Ok(()) => {}
Err(_e) => {
//error!("broken channel");
break;
}
}
break;
}
}
}
let n = pos - pos_beg;
info!("read5 done {n}");
};
tokio::task::spawn(fut);
Ok(())
}
pub struct FileContentStream5 {
rx: async_channel::Receiver<Result<FileChunkRead, Error>>,
}
impl FileContentStream5 {
pub fn new(path: PathBuf, file: File, disk_io_tune: DiskIoTune) -> Result<Self, Error> {
let (tx, rx) = async_channel::bounded(32);
start_read5(path, file, tx, disk_io_tune)?;
let ret = Self { rx };
Ok(ret)
}
}
impl Stream for FileContentStream5 {
type Item = Result<FileChunkRead, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
self.rx.poll_next_unpin(cx)
}
}
enum FCS2 {
Idle,
Reading(
(
Box<BytesMut>,
Pin<Box<dyn Future<Output = Result<usize, Error>> + Send>>,
),
),
}
pub struct FileContentStream2 {
fcs: FCS2,
file: Pin<Box<File>>,
disk_io_tune: DiskIoTune,
done: bool,
complete: bool,
}
impl FileContentStream2 {
pub fn type_name() -> &'static str {
std::any::type_name::<Self>()
}
pub fn new(file: File, disk_io_tune: DiskIoTune) -> Self {
let file = Box::pin(file);
Self {
fcs: FCS2::Idle,
file,
disk_io_tune,
done: false,
complete: false,
}
}
fn make_reading(&mut self) {
let mut buf = Box::new(BytesMut::with_capacity(self.disk_io_tune.read_buffer_len));
let bufref = unsafe { &mut *((&mut buf as &mut BytesMut) as *mut BytesMut) };
let fileref = unsafe { &mut *((&mut self.file) as *mut Pin<Box<File>>) };
let fut = AsyncReadExt::read_buf(fileref, bufref).map_err(|e| e.into());
self.fcs = FCS2::Reading((buf, Box::pin(fut)));
}
}
impl Stream for FileContentStream2 {
type Item = Result<FileChunkRead, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if self.complete {
panic!("{} poll_next on complete", Self::type_name())
} else if self.done {
self.complete = true;
Ready(None)
} else {
match self.fcs {
FCS2::Idle => {
self.make_reading();
continue;
}
FCS2::Reading((ref mut buf, ref mut fut)) => match fut.poll_unpin(cx) {
Ready(Ok(n)) => {
let buf2 = std::mem::replace(buf as &mut BytesMut, BytesMut::new());
let item = FileChunkRead::with_buf(buf2);
if n == 0 {
self.done = true;
} else {
self.make_reading();
}
Ready(Some(Ok(item)))
}
Ready(Err(e)) => {
self.done = true;
Ready(Some(Err(e.into())))
}
Pending => Pending,
},
}
};
}
}
}
enum FCS3 {
GetPosition,
ReadingSimple,
Reading,
}
enum ReadStep {
Fut(Pin<Box<dyn Future<Output = Result<read3::ReadResult, Error>> + Send>>),
Res(Result<read3::ReadResult, Error>),
}
pub struct FileContentStream3 {
fcs: FCS3,
file: Pin<Box<File>>,
file_pos: u64,
eof: bool,
disk_io_tune: DiskIoTune,
get_position_fut: Pin<Box<dyn Future<Output = Result<u64, Error>> + Send>>,
read_fut: Pin<Box<dyn Future<Output = Result<read3::ReadResult, Error>> + Send>>,
reads: VecDeque<ReadStep>,
done: bool,
complete: bool,
}
impl FileContentStream3 {
pub fn new(file: File, disk_io_tune: DiskIoTune) -> Self {
let mut file = Box::pin(file);
let ffr = unsafe {
let ffr = Pin::get_unchecked_mut(file.as_mut());
std::mem::transmute::<&mut File, &'static mut File>(ffr)
};
let ff = ffr
.seek(SeekFrom::Current(0))
.map_err(|_| Error::with_msg_no_trace(format!("Seek error")));
Self {
fcs: FCS3::GetPosition,
file,
file_pos: 0,
eof: false,
disk_io_tune,
get_position_fut: Box::pin(ff),
read_fut: Box::pin(futures_util::future::ready(Err(Error::with_msg_no_trace(format!(
"dummy"
))))),
reads: VecDeque::new(),
done: false,
complete: false,
}
}
}
impl Stream for FileContentStream3 {
type Item = Result<FileChunkRead, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if self.complete {
panic!("poll_next on complete")
} else if self.done {
self.complete = true;
Ready(None)
} else {
match self.fcs {
FCS3::GetPosition => match self.get_position_fut.poll_unpin(cx) {
Ready(Ok(k)) => {
info!("current file pos: {k}");
self.file_pos = k;
if false {
let fd = self.file.as_raw_fd();
let count = self.disk_io_tune.read_buffer_len as u64;
self.read_fut = Box::pin(read3::Read3::get().read(fd, self.file_pos, count));
self.file_pos += count;
self.fcs = FCS3::ReadingSimple;
} else {
self.fcs = FCS3::Reading;
}
continue;
}
Ready(Err(e)) => {
self.done = true;
Ready(Some(Err(e)))
}
Pending => Pending,
},
FCS3::ReadingSimple => match self.read_fut.poll_unpin(cx) {
Ready(Ok(res)) => {
if res.eof {
let item = FileChunkRead::with_buf(res.buf);
self.done = true;
Ready(Some(Ok(item)))
} else {
let item = FileChunkRead::with_buf(res.buf);
let fd = self.file.as_raw_fd();
let count = self.disk_io_tune.read_buffer_len as u64;
self.read_fut = Box::pin(read3::Read3::get().read(fd, self.file_pos, count));
self.file_pos += count;
Ready(Some(Ok(item)))
}
}
Ready(Err(e)) => {
self.done = true;
Ready(Some(Err(e)))
}
Pending => Pending,
},
FCS3::Reading => {
while !self.eof && self.reads.len() < self.disk_io_tune.read_queue_len {
let fd = self.file.as_raw_fd();
let pos = self.file_pos;
let count = self.disk_io_tune.read_buffer_len as u64;
trace!("create ReadTask fd {fd} pos {pos} count {count}");
let r3 = read3::Read3::get();
let fut = r3.read(fd, pos, count);
self.reads.push_back(ReadStep::Fut(Box::pin(fut)));
self.file_pos += count;
}
for e in &mut self.reads {
match e {
ReadStep::Fut(k) => match k.poll_unpin(cx) {
Ready(k) => {
trace!("received a result");
*e = ReadStep::Res(k);
}
Pending => {}
},
ReadStep::Res(_) => {}
}
}
if let Some(ReadStep::Res(_)) = self.reads.front() {
if let Some(ReadStep::Res(res)) = self.reads.pop_front() {
trace!("pop front result");
match res {
Ok(rr) => {
if rr.eof {
if self.eof {
trace!("see EOF in ReadResult AGAIN");
} else {
debug!("see EOF in ReadResult SET OUR FLAG");
self.eof = true;
}
}
let res = FileChunkRead::with_buf(rr.buf);
Ready(Some(Ok(res)))
}
Err(e) => {
error!("received ReadResult error: {e}");
self.done = true;
let e = Error::with_msg(format!("I/O error: {e}"));
Ready(Some(Err(e)))
}
}
} else {
self.done = true;
let e = Error::with_msg(format!("logic error"));
error!("{e}");
Ready(Some(Err(e)))
}
} else if let None = self.reads.front() {
debug!("empty read fut queue, end");
self.done = true;
continue;
} else {
trace!("read fut queue Pending");
Pending
}
}
}
};
}
}
}
enum FCS4 {
Init,
Setup,
Reading,
}
pub struct FileContentStream4 {
fcs: FCS4,
file: Pin<Box<File>>,
disk_io_tune: DiskIoTune,
setup_fut:
Option<Pin<Box<dyn Future<Output = Result<mpsc::Receiver<Result<read4::ReadResult, Error>>, Error>> + Send>>>,
inp: Option<mpsc::Receiver<Result<read4::ReadResult, Error>>>,
recv_fut: Pin<Box<dyn Future<Output = Option<Result<read4::ReadResult, Error>>> + Send>>,
done: bool,
complete: bool,
}
impl FileContentStream4 {
pub fn new(file: File, disk_io_tune: DiskIoTune) -> Self {
let file = Box::pin(file);
Self {
fcs: FCS4::Init,
file,
disk_io_tune,
setup_fut: None,
inp: None,
recv_fut: Box::pin(futures_util::future::ready(Some(Err(Error::with_msg_no_trace(
format!("dummy"),
))))),
done: false,
complete: false,
}
}
}
impl Stream for FileContentStream4 {
type Item = Result<FileChunkRead, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if self.complete {
panic!("poll_next on complete")
} else if self.done {
self.complete = true;
Ready(None)
} else {
match self.fcs {
FCS4::Init => {
let read4 = read4::Read4::get();
let fd = self.file.as_raw_fd();
let buflen = self.disk_io_tune.read_buffer_len as u64;
let fut = read4.read(fd, buflen, self.disk_io_tune.read_queue_len);
self.setup_fut = Some(Box::pin(fut) as _);
self.fcs = FCS4::Setup;
continue;
}
FCS4::Setup => match self.setup_fut.as_mut().unwrap().poll_unpin(cx) {
Ready(k) => match k {
Ok(k) => {
self.setup_fut = None;
self.fcs = FCS4::Reading;
self.inp = Some(k);
// TODO
let rm = self.inp.as_mut().unwrap();
let rm = unsafe {
std::mem::transmute::<
&mut mpsc::Receiver<Result<read4::ReadResult, Error>>,
&'static mut mpsc::Receiver<Result<read4::ReadResult, Error>>,
>(rm)
};
self.recv_fut = Box::pin(rm.recv()) as _;
continue;
}
Err(e) => {
self.done = true;
let e = Error::with_msg_no_trace(format!("init failed {e:?}"));
Ready(Some(Err(e)))
}
},
Pending => Pending,
},
FCS4::Reading => match self.recv_fut.poll_unpin(cx) {
Ready(k) => match k {
Some(k) => match k {
Ok(k) => {
// TODO
let rm = self.inp.as_mut().unwrap();
let rm = unsafe {
std::mem::transmute::<
&mut mpsc::Receiver<Result<read4::ReadResult, Error>>,
&'static mut mpsc::Receiver<Result<read4::ReadResult, Error>>,
>(rm)
};
self.recv_fut = Box::pin(rm.recv()) as _;
let item = FileChunkRead::with_buf(k.buf);
Ready(Some(Ok(item)))
}
Err(e) => {
self.done = true;
let e = Error::with_msg_no_trace(format!("init failed {e:?}"));
Ready(Some(Err(e)))
}
},
None => {
self.done = true;
continue;
}
},
Pending => Pending,
},
}
};
}
}
}
pub fn file_content_stream(
path: PathBuf,
file: File,
disk_io_tune: DiskIoTune,
) -> Pin<Box<dyn Stream<Item = Result<FileChunkRead, Error>> + Send>> {
debug!("file_content_stream disk_io_tune {disk_io_tune:?}");
match &disk_io_tune.read_sys {
ReadSys::TokioAsyncRead => {
let s = FileContentStream::new(file, disk_io_tune);
Box::pin(s) as Pin<Box<dyn Stream<Item = _> + Send>>
}
ReadSys::Read2 => {
let s = FileContentStream2::new(file, disk_io_tune);
Box::pin(s) as _
}
ReadSys::Read3 => {
let s = FileContentStream3::new(file, disk_io_tune);
Box::pin(s) as _
}
ReadSys::Read4 => {
let s = FileContentStream4::new(file, disk_io_tune);
Box::pin(s) as _
}
ReadSys::Read5 => {
let s = FileContentStream5::new(path, file, disk_io_tune).unwrap();
Box::pin(s) as _
}
}
}
trait ChannelConfigExt {
fn dtflags(&self) -> u8;
}
impl ChannelConfigExt for SfDbChConf {
fn dtflags(&self) -> u8 {
let mut ret = 0;
if self.compression {
ret |= COMPRESSION;
}
match self.shape {
Shape::Scalar => {}
Shape::Wave(_) => {
ret |= SHAPE;
}
Shape::Image(_, _) => {
ret |= SHAPE;
}
}
if self.byte_order.is_be() {
ret |= BIG_ENDIAN;
}
if self.array {
ret |= ARRAY;
}
ret
}
}

View File

@@ -0,0 +1,420 @@
use crate::dataopen::open_expanded_files;
use crate::dataopen::open_files;
use crate::dataopen::OpenedFileSet;
use crate::eventchunker::EventChunker;
use crate::eventchunker::EventChunkerConf;
use err::Error;
use futures_util::Stream;
use futures_util::StreamExt;
use items_0::streamitem::LogItem;
use items_0::streamitem::RangeCompletableItem;
use items_0::streamitem::Sitemty;
use items_0::streamitem::StreamItem;
use items_0::WithLen;
use items_2::eventfull::EventFull;
use items_2::merger::Merger;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::timeunits::SEC;
use netpod::DiskIoTune;
use netpod::Node;
use netpod::SfChFetchInfo;
use std::collections::VecDeque;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use streams::rangefilter2::RangeFilter2;
pub trait InputTraits: Stream<Item = Sitemty<EventFull>> {}
impl<T> InputTraits for T where T: Stream<Item = Sitemty<EventFull>> {}
pub struct EventChunkerMultifile {
fetch_info: SfChFetchInfo,
file_chan: async_channel::Receiver<Result<OpenedFileSet, Error>>,
evs: Option<Pin<Box<dyn InputTraits + Send>>>,
disk_io_tune: DiskIoTune,
event_chunker_conf: EventChunkerConf,
range: NanoRange,
files_count: u32,
node_ix: usize,
expand: bool,
do_decompress: bool,
max_ts: u64,
out_max_len: usize,
emit_count: usize,
do_emit_err_after: Option<usize>,
range_final: bool,
log_queue: VecDeque<LogItem>,
done: bool,
done_emit_range_final: bool,
complete: bool,
}
impl EventChunkerMultifile {
pub fn type_name() -> &'static str {
std::any::type_name::<Self>()
}
pub fn new(
range: NanoRange,
fetch_info: SfChFetchInfo,
node: Node,
node_ix: usize,
disk_io_tune: DiskIoTune,
event_chunker_conf: EventChunkerConf,
expand: bool,
do_decompress: bool,
out_max_len: usize,
) -> Self {
info!("EventChunkerMultifile expand {expand} do_decompress {do_decompress}");
let file_chan = if expand {
open_expanded_files(&range, &fetch_info, node)
} else {
open_files(&range, &fetch_info, node)
};
Self {
file_chan,
evs: None,
disk_io_tune,
event_chunker_conf,
fetch_info,
range,
files_count: 0,
node_ix,
expand,
do_decompress,
max_ts: 0,
out_max_len,
emit_count: 0,
do_emit_err_after: None,
range_final: false,
log_queue: VecDeque::new(),
done: false,
done_emit_range_final: false,
complete: false,
}
}
}
impl Stream for EventChunkerMultifile {
type Item = Result<StreamItem<RangeCompletableItem<EventFull>>, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
let span1 = span!(Level::INFO, "EvChMul", node_ix = self.node_ix);
let _spg = span1.enter();
info!("EventChunkerMultifile poll_next");
use Poll::*;
'outer: loop {
break if let Some(item) = self.log_queue.pop_front() {
Ready(Some(Ok(StreamItem::Log(item))))
} else if self.complete {
panic!("{} poll_next on complete", Self::type_name());
} else if self.done_emit_range_final {
self.complete = true;
Ready(None)
} else if self.done {
self.done_emit_range_final = true;
if self.range_final {
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else {
continue;
}
} else {
match &mut self.evs {
Some(evs) => match evs.poll_next_unpin(cx) {
Ready(Some(Ok(k))) => {
let k = if let StreamItem::DataItem(RangeCompletableItem::Data(h)) = k {
let mut h: EventFull = h;
if h.len() > 0 {
let min = h.tss.iter().fold(u64::MAX, |a, &x| a.min(x));
let max = h.tss.iter().fold(u64::MIN, |a, &x| a.max(x));
if min <= self.max_ts {
let msg = format!("EventChunkerMultifile repeated or unordered ts {}", min);
error!("{}", msg);
let item = LogItem {
node_ix: self.node_ix as _,
level: Level::INFO,
msg,
};
self.log_queue.push_back(item);
}
self.max_ts = max;
if let Some(after) = self.do_emit_err_after {
if self.emit_count < after {
debug!(
"EventChunkerMultifile emit {}/{} events {}",
self.emit_count,
after,
h.len()
);
self.emit_count += 1;
}
}
if max >= self.range.end {
self.range_final = true;
h.truncate_ts(self.range.end);
self.evs = None;
let (tx, rx) = async_channel::bounded(1);
drop(tx);
self.file_chan = rx;
}
}
StreamItem::DataItem(RangeCompletableItem::Data(h))
} else {
k
};
Ready(Some(Ok(k)))
}
Ready(Some(Err(e))) => {
error!("{e}");
self.done = true;
Ready(Some(Err(e)))
}
Ready(None) => {
self.evs = None;
continue 'outer;
}
Pending => Pending,
},
None => match self.file_chan.poll_next_unpin(cx) {
Ready(Some(k)) => match k {
Ok(ofs) => {
self.files_count += ofs.files.len() as u32;
if ofs.files.len() == 1 {
let mut ofs = ofs;
let file = ofs.files.pop().unwrap();
let path = file.path;
let msg = format!("handle OFS {:?}", ofs);
debug!("{}", msg);
let item = LogItem::quick(Level::INFO, msg);
match file.file {
Some(file) => {
let inp = Box::pin(crate::file_content_stream(
path.clone(),
file,
self.disk_io_tune.clone(),
));
let chunker = EventChunker::from_event_boundary(
inp,
self.fetch_info.clone(),
self.range.clone(),
self.event_chunker_conf.clone(),
path.clone(),
self.expand,
self.do_decompress,
);
let filtered = RangeFilter2::new(chunker, self.range.clone(), self.expand);
self.evs = Some(Box::pin(filtered));
}
None => {}
}
Ready(Some(Ok(StreamItem::Log(item))))
} else if ofs.files.len() == 0 {
let msg = format!("handle OFS {:?} NO FILES", ofs);
debug!("{}", msg);
let item = LogItem::quick(Level::INFO, msg);
Ready(Some(Ok(StreamItem::Log(item))))
} else {
let msg = format!("handle OFS MERGED timebin {}", ofs.timebin);
info!("{}", msg);
for x in &ofs.files {
info!(" path {:?}", x.path);
}
let item = LogItem::quick(Level::INFO, msg);
let mut chunkers = Vec::new();
for of in ofs.files {
if let Some(file) = of.file {
let inp = crate::file_content_stream(
of.path.clone(),
file,
self.disk_io_tune.clone(),
);
let chunker = EventChunker::from_event_boundary(
inp,
self.fetch_info.clone(),
self.range.clone(),
self.event_chunker_conf.clone(),
of.path.clone(),
self.expand,
self.do_decompress,
);
chunkers.push(Box::pin(chunker) as _);
}
}
let merged = Merger::new(chunkers, self.out_max_len);
let filtered = RangeFilter2::new(merged, self.range.clone(), self.expand);
self.evs = Some(Box::pin(filtered));
Ready(Some(Ok(StreamItem::Log(item))))
}
}
Err(e) => {
self.done = true;
Ready(Some(Err(e)))
}
},
Ready(None) => {
self.done = true;
let item = LogItem::quick(
Level::INFO,
format!(
"EventChunkerMultifile used {} datafiles beg {} end {} node_ix {}",
self.files_count,
self.range.beg / SEC,
self.range.end / SEC,
self.node_ix
),
);
Ready(Some(Ok(StreamItem::Log(item))))
}
Pending => Pending,
},
}
};
}
}
}
// TODO re-enable tests generate data on the fly.
#[cfg(DISABLED)]
#[cfg(test)]
mod test {
use crate::eventblobs::EventChunkerMultifile;
use crate::eventchunker::EventChunkerConf;
use crate::SfDbChConf;
use err::Error;
use futures_util::StreamExt;
use items_0::streamitem::RangeCompletableItem;
use items_0::streamitem::StreamItem;
use items_0::WithLen;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::timeunits::DAY;
use netpod::timeunits::MS;
use netpod::ByteSize;
use netpod::DiskIoTune;
use netpod::TsNano;
use streams::rangefilter2::RangeFilter2;
const BACKEND: &str = "testbackend-00";
fn read_expanded_for_range(range: NanoRange, nodeix: usize) -> Result<(usize, Vec<u64>), Error> {
let chn = netpod::SfDbChannel {
backend: BACKEND.into(),
name: "scalar-i32-be".into(),
series: None,
};
// TODO read config from disk.
let channel_config = SfDbChConf {
channel: chn,
keyspace: 2,
time_bin_size: TsNano(DAY),
scalar_type: netpod::ScalarType::I32,
byte_order: netpod::ByteOrder::Big,
shape: netpod::Shape::Scalar,
array: false,
compression: false,
};
let cluster = netpod::test_cluster();
let node = cluster.nodes[nodeix].clone();
let event_chunker_conf = EventChunkerConf {
disk_stats_every: ByteSize::kb(1024),
};
let disk_io_tune = DiskIoTune::default_for_testing();
let task = async move {
let mut event_count = 0;
let events = EventChunkerMultifile::new(
range.clone(),
channel_config,
node,
nodeix,
disk_io_tune,
event_chunker_conf,
true,
true,
// TODO do asserts depend on this?
32,
);
//let mut events = MergedStream::new(vec![events], range.clone(), true);
let mut events = RangeFilter2::new(events, range.clone(), true);
let mut tss = Vec::new();
while let Some(item) = events.next().await {
match item {
Ok(item) => match item {
StreamItem::DataItem(item) => match item {
RangeCompletableItem::Data(item) => {
// TODO assert more
debug!("item: {:?}", item.tss.iter().map(|x| x / MS).collect::<Vec<_>>());
event_count += item.len();
for ts in item.tss {
tss.push(ts);
}
}
_ => {}
},
_ => {}
},
Err(e) => return Err(e.into()),
}
}
Ok((event_count, tss))
};
Ok(taskrun::run(task).unwrap())
}
#[test]
fn read_expanded_0() -> Result<(), Error> {
let range = NanoRange {
beg: DAY + MS * 0,
end: DAY + MS * 100,
};
let res = read_expanded_for_range(range, 0)?;
// TODO assert more
debug!("got {:?}", res.1);
if res.0 != 3 {
Err(Error::with_msg(format!("unexpected number of events: {}", res.0)))?;
}
assert_eq!(res.1, vec![DAY - MS * 1500, DAY, DAY + MS * 1500]);
Ok(())
}
#[test]
fn read_expanded_1() -> Result<(), Error> {
let range = NanoRange {
beg: DAY + MS * 0,
end: DAY + MS * 1501,
};
let res = read_expanded_for_range(range, 0)?;
if res.0 != 4 {
Err(Error::with_msg(format!("unexpected number of events: {}", res.0)))?;
}
assert_eq!(res.1, vec![DAY - MS * 1500, DAY, DAY + MS * 1500, DAY + MS * 3000]);
Ok(())
}
#[test]
fn read_expanded_2() -> Result<(), Error> {
let range = NanoRange {
beg: DAY - MS * 100,
end: DAY + MS * 1501,
};
let res = read_expanded_for_range(range, 0)?;
assert_eq!(res.1, vec![DAY - MS * 1500, DAY, DAY + MS * 1500, DAY + MS * 3000]);
Ok(())
}
#[test]
fn read_expanded_3() -> Result<(), Error> {
use netpod::timeunits::*;
let range = NanoRange {
beg: DAY - MS * 1500,
end: DAY + MS * 1501,
};
let res = read_expanded_for_range(range, 0)?;
assert_eq!(
res.1,
vec![DAY - MS * 3000, DAY - MS * 1500, DAY, DAY + MS * 1500, DAY + MS * 3000]
);
Ok(())
}
}

View File

@@ -0,0 +1,673 @@
use bitshuffle::bitshuffle_decompress;
use bytes::Buf;
use bytes::BytesMut;
use err::thiserror;
use err::Error;
use futures_util::Stream;
use futures_util::StreamExt;
use items_0::streamitem::RangeCompletableItem;
use items_0::streamitem::StatsItem;
use items_0::streamitem::StreamItem;
use items_0::Empty;
use items_0::WithLen;
use items_2::eventfull::EventFull;
use netpod::histo::HistoLog2;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::timeunits::SEC;
use netpod::ByteSize;
use netpod::EventDataReadStats;
use netpod::ScalarType;
use netpod::SfChFetchInfo;
use netpod::Shape;
use parse::channelconfig::CompressionMethod;
use std::io::Cursor;
use std::path::PathBuf;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use std::time::Instant;
use streams::dtflags::*;
use streams::filechunkread::FileChunkRead;
use streams::needminbuffer::NeedMinBuffer;
#[derive(Debug, thiserror::Error)]
pub enum DataParseError {
#[error("DataFrameLengthMismatch")]
DataFrameLengthMismatch,
#[error("FileHeaderTooShort")]
FileHeaderTooShort,
#[error("BadVersionTag")]
BadVersionTag,
#[error("HeaderTooLarge")]
HeaderTooLarge,
#[error("Utf8Error")]
Utf8Error,
#[error("EventTooShort")]
EventTooShort,
#[error("EventTooLong")]
EventTooLong,
#[error("TooManyBeforeRange")]
TooManyBeforeRange,
#[error("EventWithOptional")]
EventWithOptional,
#[error("BadTypeIndex")]
BadTypeIndex,
#[error("WaveShapeWithoutEventArray")]
WaveShapeWithoutEventArray,
#[error("ShapedWithoutDims")]
ShapedWithoutDims,
#[error("TooManyDims")]
TooManyDims,
#[error("UnknownCompression")]
UnknownCompression,
#[error("BadCompresionBlockSize")]
BadCompresionBlockSize,
}
pub struct EventChunker {
inp: NeedMinBuffer,
state: DataFileState,
need_min: u32,
fetch_info: SfChFetchInfo,
need_min_max: u32,
errored: bool,
completed: bool,
range: NanoRange,
stats_conf: EventChunkerConf,
seen_beyond_range: bool,
sent_beyond_range: bool,
data_emit_complete: bool,
final_stats_sent: bool,
parsed_bytes: u64,
dbg_path: PathBuf,
last_ts: u64,
expand: bool,
do_decompress: bool,
decomp_dt_histo: HistoLog2,
item_len_emit_histo: HistoLog2,
seen_before_range_count: usize,
seen_after_range_count: usize,
unordered_count: usize,
repeated_ts_count: usize,
config_mismatch_discard: usize,
discard_count: usize,
}
impl Drop for EventChunker {
fn drop(&mut self) {
// TODO collect somewhere
if self.config_mismatch_discard != 0 {
warn!("config_mismatch_discard {}", self.config_mismatch_discard);
}
debug!(
"EventChunker Drop Stats:\ndecomp_dt_histo: {:?}\nitem_len_emit_histo: {:?}",
self.decomp_dt_histo, self.item_len_emit_histo
);
}
}
enum DataFileState {
FileHeader,
Event,
}
struct ParseResult {
events: EventFull,
parsed_bytes: u64,
}
#[derive(Clone, Debug)]
pub struct EventChunkerConf {
pub disk_stats_every: ByteSize,
}
impl EventChunkerConf {
pub fn new(disk_stats_every: ByteSize) -> Self {
Self { disk_stats_every }
}
}
fn is_config_match(is_array: &bool, ele_count: &u64, fetch_info: &SfChFetchInfo) -> bool {
match fetch_info.shape() {
Shape::Scalar => {
if *is_array {
false
} else {
true
}
}
Shape::Wave(dim1count) => {
if (*dim1count as u64) != *ele_count {
false
} else {
true
}
}
Shape::Image(n1, n2) => {
let nt = (*n1 as u64) * (*n2 as u64);
if nt != *ele_count {
false
} else {
true
}
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum DecompError {
#[error("Error")]
Error,
}
fn decompress(databuf: &[u8], type_size: u32, ele_count: u64) -> Result<Vec<u8>, DecompError> {
if databuf.len() < 13 {
return Err(DecompError::Error);
}
let ts1 = Instant::now();
let decomp_bytes = type_size as u64 * ele_count;
let mut decomp = vec![0; decomp_bytes as usize];
let ele_size = type_size;
// TODO limit the buf slice range
match bitshuffle_decompress(&databuf[12..], &mut decomp, ele_count as usize, ele_size as usize, 0) {
Ok(c1) => {
if 12 + c1 != databuf.len() {}
let ts2 = Instant::now();
let dt = ts2.duration_since(ts1);
// TODO analyze the histo
//self.decomp_dt_histo.ingest(dt.as_secs() as u32 + dt.subsec_micros());
Ok(decomp)
}
Err(e) => {
return Err(DecompError::Error);
}
}
}
impl EventChunker {
pub fn self_name() -> &'static str {
std::any::type_name::<Self>()
}
// TODO `expand` flag usage
pub fn from_start(
inp: Pin<Box<dyn Stream<Item = Result<FileChunkRead, Error>> + Send>>,
fetch_info: SfChFetchInfo,
range: NanoRange,
stats_conf: EventChunkerConf,
dbg_path: PathBuf,
expand: bool,
do_decompress: bool,
) -> Self {
info!(
"{}::{} do_decompress {}",
Self::self_name(),
"from_start",
do_decompress
);
let need_min_max = match fetch_info.shape() {
Shape::Scalar => 1024 * 8,
Shape::Wave(_) => 1024 * 32,
Shape::Image(_, _) => 1024 * 1024 * 40,
};
let mut inp = NeedMinBuffer::new(inp);
inp.set_need_min(6);
Self {
inp,
state: DataFileState::FileHeader,
need_min: 6,
need_min_max,
fetch_info,
errored: false,
completed: false,
range,
stats_conf,
seen_beyond_range: false,
sent_beyond_range: false,
data_emit_complete: false,
final_stats_sent: false,
parsed_bytes: 0,
dbg_path,
last_ts: 0,
expand,
do_decompress,
decomp_dt_histo: HistoLog2::new(8),
item_len_emit_histo: HistoLog2::new(0),
seen_before_range_count: 0,
seen_after_range_count: 0,
unordered_count: 0,
repeated_ts_count: 0,
config_mismatch_discard: 0,
discard_count: 0,
}
}
// TODO `expand` flag usage
pub fn from_event_boundary(
inp: Pin<Box<dyn Stream<Item = Result<FileChunkRead, Error>> + Send>>,
fetch_info: SfChFetchInfo,
range: NanoRange,
stats_conf: EventChunkerConf,
dbg_path: PathBuf,
expand: bool,
do_decompress: bool,
) -> Self {
info!(
"{}::{} do_decompress {}",
Self::self_name(),
"from_event_boundary",
do_decompress
);
let mut ret = Self::from_start(inp, fetch_info, range, stats_conf, dbg_path, expand, do_decompress);
ret.state = DataFileState::Event;
ret.need_min = 4;
ret.inp.set_need_min(4);
ret
}
fn parse_buf(&mut self, buf: &mut BytesMut) -> Result<ParseResult, Error> {
span!(Level::INFO, "EventChunker::parse_buf")
.in_scope(|| self.parse_buf_inner(buf))
.map_err(|e| Error::with_msg_no_trace(format!("{e:?}")))
}
fn parse_buf_inner(&mut self, buf: &mut BytesMut) -> Result<ParseResult, DataParseError> {
use byteorder::ReadBytesExt;
use byteorder::BE;
info!("parse_buf_inner buf len {}", buf.len());
let mut ret = EventFull::empty();
let mut parsed_bytes = 0;
loop {
if (buf.len() as u32) < self.need_min {
break;
}
match self.state {
DataFileState::FileHeader => {
if buf.len() < 6 {
return Err(DataParseError::FileHeaderTooShort);
}
let mut sl = Cursor::new(buf.as_ref());
let fver = sl.read_i16::<BE>().unwrap();
if fver != 0 {
return Err(DataParseError::BadVersionTag);
}
let len = sl.read_i32::<BE>().unwrap();
if len <= 0 || len >= 512 {
return Err(DataParseError::HeaderTooLarge);
}
let totlen = len as usize + 2;
if buf.len() < totlen {
self.need_min = totlen as u32;
break;
} else {
sl.advance(len as usize - 8);
let len2 = sl.read_i32::<BE>().unwrap();
if len != len2 {
return Err(DataParseError::DataFrameLengthMismatch);
}
let _ = String::from_utf8(buf.as_ref()[6..(len as usize + 6 - 8)].to_vec())
.map_err(|_| DataParseError::Utf8Error);
self.state = DataFileState::Event;
self.need_min = 4;
buf.advance(totlen);
parsed_bytes += totlen as u64;
}
}
DataFileState::Event => {
let p0 = 0;
let mut sl = Cursor::new(buf.as_ref());
let len = sl.read_i32::<BE>().unwrap();
if len < 20 {
return Err(DataParseError::EventTooShort);
}
match self.fetch_info.shape() {
Shape::Scalar if len > 512 => return Err(DataParseError::EventTooLong),
Shape::Wave(_) if len > 8 * 1024 * 256 => return Err(DataParseError::EventTooLong),
Shape::Image(_, _) if len > 1024 * 1024 * 40 => return Err(DataParseError::EventTooLong),
_ => {}
}
let len = len as u32;
if (buf.len() as u32) < len {
self.need_min = len as u32;
break;
} else {
let mut discard = false;
let _ttl = sl.read_i64::<BE>().unwrap();
let ts = sl.read_i64::<BE>().unwrap() as u64;
let pulse = sl.read_i64::<BE>().unwrap() as u64;
if ts == self.last_ts {
self.repeated_ts_count += 1;
if self.repeated_ts_count < 20 {
let msg = format!(
"EventChunker repeated event ts ix {} ts {}.{} last_ts {}.{} config {:?} path {:?}",
self.repeated_ts_count,
ts / SEC,
ts % SEC,
self.last_ts / SEC,
self.last_ts % SEC,
self.fetch_info.shape(),
self.dbg_path
);
warn!("{}", msg);
}
}
if ts < self.last_ts {
discard = true;
self.unordered_count += 1;
if self.unordered_count < 20 {
let msg = format!(
"EventChunker unordered event ix {} ts {}.{} last_ts {}.{} config {:?} path {:?}",
self.unordered_count,
ts / SEC,
ts % SEC,
self.last_ts / SEC,
self.last_ts % SEC,
self.fetch_info.shape(),
self.dbg_path
);
warn!("{}", msg);
}
}
self.last_ts = ts;
if ts >= self.range.end {
discard = true;
self.seen_after_range_count += 1;
if !self.expand || self.seen_after_range_count >= 2 {
self.seen_beyond_range = true;
self.data_emit_complete = true;
break;
}
}
if ts < self.range.beg {
discard = true;
self.seen_before_range_count += 1;
if self.seen_before_range_count < 20 {
let msg = format!(
"seen before range: {} event ts {}.{} range beg {}.{} range end {}.{} pulse {} config {:?} path {:?}",
self.seen_before_range_count,
ts / SEC,
ts % SEC,
self.range.beg / SEC,
self.range.beg % SEC,
self.range.end / SEC,
self.range.end % SEC,
pulse,
self.fetch_info.shape(),
self.dbg_path
);
warn!("{}", msg);
}
if self.seen_before_range_count > 100 {
let msg = format!(
"too many seen before range: {} event ts {}.{} range beg {}.{} range end {}.{} pulse {} config {:?} path {:?}",
self.seen_before_range_count,
ts / SEC,
ts % SEC,
self.range.beg / SEC,
self.range.beg % SEC,
self.range.end / SEC,
self.range.end % SEC,
pulse,
self.fetch_info.shape(),
self.dbg_path
);
error!("{}", msg);
return Err(DataParseError::TooManyBeforeRange);
}
}
let _ioc_ts = sl.read_i64::<BE>().unwrap();
let status = sl.read_i8().unwrap();
let severity = sl.read_i8().unwrap();
let optional = sl.read_i32::<BE>().unwrap();
if status != 0 {
// return Err(DataParseError::UnexpectedStatus);
// TODO count
}
if severity != 0 {
// return Err(DataParseError::TooManyBeforeRange);
// TODO count
}
if optional != -1 {
return Err(DataParseError::EventWithOptional);
}
let type_flags = sl.read_u8().unwrap();
let type_index = sl.read_u8().unwrap();
if type_index > 13 {
return Err(DataParseError::BadTypeIndex);
}
let scalar_type =
ScalarType::from_dtype_index(type_index).map_err(|_| DataParseError::BadTypeIndex)?;
let is_compressed = type_flags & COMPRESSION != 0;
let is_array = type_flags & ARRAY != 0;
let is_big_endian = type_flags & BIG_ENDIAN != 0;
let is_shaped = type_flags & SHAPE != 0;
if let Shape::Wave(_) = self.fetch_info.shape() {
if !is_array {
return Err(DataParseError::WaveShapeWithoutEventArray);
}
}
let compression_method = if is_compressed { sl.read_u8().unwrap() } else { 0 };
let shape_dim = if is_shaped { sl.read_u8().unwrap() } else { 0 };
let mut shape_lens = [0, 0, 0, 0];
for i1 in 0..shape_dim {
shape_lens[i1 as usize] = sl.read_u32::<BE>().unwrap();
}
let shape_this = {
if is_shaped {
if shape_dim == 1 {
Shape::Wave(shape_lens[0])
} else if shape_dim == 2 {
Shape::Image(shape_lens[0], shape_lens[1])
} else if shape_dim == 0 {
discard = true;
// return Err(DataParseError::ShapedWithoutDims);
Shape::Scalar
} else {
discard = true;
// return Err(DataParseError::TooManyDims);
Shape::Scalar
}
} else {
Shape::Scalar
}
};
let comp_this = if is_compressed {
if compression_method == 0 {
Some(CompressionMethod::BitshuffleLZ4)
} else {
return Err(DataParseError::UnknownCompression);
}
} else {
None
};
let p1 = sl.position();
let n1 = p1 - p0;
let n2 = len as u64 - n1 - 4;
let databuf = buf[p1 as usize..(p1 as usize + n2 as usize)].as_ref();
if false && is_compressed {
//debug!("event ts {} is_compressed {}", ts, is_compressed);
let value_bytes = sl.read_u64::<BE>().unwrap();
let block_size = sl.read_u32::<BE>().unwrap();
//debug!("event len {} ts {} is_compressed {} shape_dim {} len-dim-0 {} value_bytes {} block_size {}", len, ts, is_compressed, shape_dim, shape_lens[0], value_bytes, block_size);
match self.fetch_info.shape() {
Shape::Scalar => {
assert!(value_bytes < 1024 * 1);
}
Shape::Wave(_) => {
assert!(value_bytes < 1024 * 64);
}
Shape::Image(_, _) => {
assert!(value_bytes < 1024 * 1024 * 20);
}
}
if block_size > 1024 * 32 {
return Err(DataParseError::BadCompresionBlockSize);
}
let type_size = scalar_type.bytes() as u32;
let _ele_count = value_bytes / type_size as u64;
let _ele_size = type_size;
}
if discard {
self.discard_count += 1;
} else {
ret.add_event(
ts,
pulse,
Some(databuf.to_vec()),
None,
scalar_type,
is_big_endian,
shape_this,
comp_this,
);
}
buf.advance(len as usize);
parsed_bytes += len as u64;
self.need_min = 4;
}
}
}
}
Ok(ParseResult {
events: ret,
parsed_bytes,
})
}
}
impl Stream for EventChunker {
type Item = Result<StreamItem<RangeCompletableItem<EventFull>>, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
'outer: loop {
break if self.completed {
panic!("EventChunker poll_next on completed");
} else if self.errored {
self.completed = true;
Ready(None)
} else if self.parsed_bytes >= self.stats_conf.disk_stats_every.bytes() as u64 {
let item = EventDataReadStats {
parsed_bytes: self.parsed_bytes,
};
self.parsed_bytes = 0;
let ret = StreamItem::Stats(StatsItem::EventDataReadStats(item));
Ready(Some(Ok(ret)))
} else if self.sent_beyond_range {
self.completed = true;
Ready(None)
} else if self.final_stats_sent {
self.sent_beyond_range = true;
trace!("sent_beyond_range");
if self.seen_beyond_range {
trace!("sent_beyond_range RangeComplete");
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else {
trace!("sent_beyond_range non-complete");
continue 'outer;
}
} else if self.data_emit_complete {
let item = EventDataReadStats {
parsed_bytes: self.parsed_bytes,
};
self.parsed_bytes = 0;
let ret = StreamItem::Stats(StatsItem::EventDataReadStats(item));
self.final_stats_sent = true;
Ready(Some(Ok(ret)))
} else {
match self.inp.poll_next_unpin(cx) {
Ready(Some(Ok(mut fcr))) => {
if false {
// TODO collect for stats:
info!(
"file read bytes {} ms {}",
fcr.buf().len(),
fcr.duration().as_millis()
);
}
let r = self.parse_buf(fcr.buf_mut());
match r {
Ok(res) => {
self.parsed_bytes += res.parsed_bytes;
if fcr.buf().len() > 0 {
// TODO gather stats about this:
self.inp.put_back(fcr);
}
if self.need_min > self.need_min_max {
let msg = format!(
"spurious EventChunker asks for need_min {} max {}",
self.need_min, self.need_min_max
);
self.errored = true;
return Ready(Some(Err(Error::with_msg(msg))));
}
let x = self.need_min;
self.inp.set_need_min(x);
if false {
info!(
"EventChunker emits {} events tss {:?}",
res.events.len(),
res.events.tss
);
};
self.item_len_emit_histo.ingest(res.events.len() as u32);
let ret = StreamItem::DataItem(RangeCompletableItem::Data(res.events));
Ready(Some(Ok(ret)))
}
Err(e) => {
self.errored = true;
Ready(Some(Err(e.into())))
}
}
}
Ready(Some(Err(e))) => {
self.errored = true;
Ready(Some(Err(e)))
}
Ready(None) => {
self.data_emit_complete = true;
continue 'outer;
}
Pending => Pending,
}
};
}
}
}
#[cfg(test)]
mod test {
//use err::Error;
//use netpod::timeunits::*;
//use netpod::{ByteSize, Nanos};
//const TEST_BACKEND: &str = "testbackend-00";
/*
#[test]
fn read_expanded_for_range(range: netpod::NanoRange, nodeix: usize) -> Result<(usize, usize), Error> {
let chn = netpod::Channel {
backend: TEST_BACKEND.into(),
name: "scalar-i32-be".into(),
};
// TODO read config from disk.
let channel_config = ChannelConfig {
channel: chn,
keyspace: 2,
time_bin_size: Nanos { ns: DAY },
scalar_type: netpod::ScalarType::I32,
byte_order: netpod::ByteOrder::big_endian(),
shape: netpod::Shape::Scalar,
array: false,
compression: false,
};
let cluster = taskrun::test_cluster();
let node = cluster.nodes[nodeix].clone();
let buffer_size = 512;
let event_chunker_conf = EventChunkerConf {
disk_stats_every: ByteSize::kb(1024),
};
}
*/
}

1
crates/disk/src/frame.rs Normal file
View File

@@ -0,0 +1 @@
pub mod makeframe;

View File

@@ -0,0 +1 @@

536
crates/disk/src/gen.rs Normal file
View File

@@ -0,0 +1,536 @@
use crate::ChannelConfigExt;
use crate::SfDbChConf;
use bitshuffle::bitshuffle_compress;
use bytes::BufMut;
use bytes::BytesMut;
use err::Error;
use netpod::log::*;
use netpod::timeunits::*;
use netpod::ByteOrder;
use netpod::DtNano;
use netpod::GenVar;
use netpod::Node;
use netpod::ScalarType;
use netpod::SfDatabuffer;
use netpod::SfDbChannel;
use netpod::Shape;
use netpod::TsNano;
use std::path::Path;
use std::path::PathBuf;
use tokio::fs::File;
use tokio::fs::OpenOptions;
use tokio::io::AsyncWriteExt;
const BACKEND: &str = "testbackend-00";
pub async fn gen_test_data() -> Result<(), Error> {
let backend = String::from(BACKEND);
let homedir = std::env::var("HOME").unwrap();
let data_base_path = PathBuf::from(homedir).join("daqbuffer-testdata").join("databuffer");
let ksprefix = String::from("ks");
let mut ensemble = Ensemble {
nodes: Vec::new(),
channels: Vec::new(),
};
{
let chn = ChannelGenProps {
config: SfDbChConf {
channel: SfDbChannel::from_name(&backend, "scalar-i32-be"),
keyspace: 2,
time_bin_size: DtNano::from_ns(DAY),
scalar_type: ScalarType::I32,
byte_order: ByteOrder::Big,
shape: Shape::Scalar,
array: false,
compression: false,
},
gen_var: netpod::GenVar::Default,
time_spacing: MS * 500,
};
ensemble.channels.push(chn);
let chn = ChannelGenProps {
config: SfDbChConf {
channel: SfDbChannel::from_name(&backend, "wave-f64-be-n21"),
keyspace: 3,
time_bin_size: DtNano::from_ns(DAY),
array: true,
scalar_type: ScalarType::F64,
shape: Shape::Wave(21),
byte_order: ByteOrder::Big,
compression: true,
},
gen_var: netpod::GenVar::Default,
time_spacing: MS * 4000,
};
ensemble.channels.push(chn);
let chn = ChannelGenProps {
config: SfDbChConf {
channel: SfDbChannel::from_name(&backend, "wave-u16-le-n77"),
keyspace: 3,
time_bin_size: DtNano::from_ns(DAY),
scalar_type: ScalarType::U16,
byte_order: ByteOrder::Little,
shape: Shape::Wave(77),
array: true,
compression: true,
},
gen_var: netpod::GenVar::Default,
time_spacing: MS * 500,
};
ensemble.channels.push(chn);
let chn = ChannelGenProps {
config: SfDbChConf {
channel: SfDbChannel::from_name(&backend, "tw-scalar-i32-be"),
keyspace: 2,
time_bin_size: DtNano::from_ns(DAY),
scalar_type: ScalarType::I32,
byte_order: ByteOrder::Little,
shape: Shape::Scalar,
array: false,
compression: false,
},
gen_var: netpod::GenVar::TimeWeight,
time_spacing: MS * 500,
};
ensemble.channels.push(chn);
let chn = ChannelGenProps {
config: SfDbChConf {
channel: SfDbChannel::from_name(&backend, "const-regular-scalar-i32-be"),
keyspace: 2,
time_bin_size: DtNano::from_ns(DAY),
scalar_type: ScalarType::I32,
byte_order: ByteOrder::Little,
shape: Shape::Scalar,
array: false,
compression: false,
},
gen_var: netpod::GenVar::ConstRegular,
time_spacing: MS * 500,
};
ensemble.channels.push(chn);
}
for i1 in 0..3 {
let node = Node {
host: "localhost".into(),
listen: "0.0.0.0".into(),
port: 7780 + i1 as u16,
port_raw: 7780 + i1 as u16 + 100,
cache_base_path: data_base_path.join(format!("node{:02}", i1)),
sf_databuffer: Some(SfDatabuffer {
data_base_path: data_base_path.join(format!("node{:02}", i1)),
ksprefix: ksprefix.clone(),
splits: None,
}),
archiver_appliance: None,
channel_archiver: None,
prometheus_api_bind: None,
};
ensemble.nodes.push(node);
}
for (split, node) in ensemble.nodes.iter().enumerate() {
gen_node(split as u32, node, &ensemble).await?;
}
Ok(())
}
struct Ensemble {
nodes: Vec<Node>,
channels: Vec<ChannelGenProps>,
}
pub struct ChannelGenProps {
config: SfDbChConf,
time_spacing: u64,
gen_var: GenVar,
}
async fn gen_node(split: u32, node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
for chn in &ensemble.channels {
gen_channel(chn, split, node, ensemble).await?
}
Ok(())
}
async fn gen_channel(chn: &ChannelGenProps, split: u32, node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
let sfc = node.sf_databuffer.as_ref().unwrap();
let config_path = sfc.data_base_path.join("config").join(chn.config.channel.name());
let channel_path = sfc
.data_base_path
.join(format!("{}_{}", sfc.ksprefix, chn.config.keyspace))
.join("byTime")
.join(chn.config.channel.name());
tokio::fs::create_dir_all(&channel_path).await?;
gen_config(&config_path, &chn.config, node, ensemble)
.await
.map_err(|k| Error::with_msg(format!("can not generate config {:?}", k)))?;
let mut evix = 0;
let mut ts = TsNano(0);
let mut pulse = 0;
while ts.ns() < DAY * 3 {
let res = gen_timebin(
evix,
ts,
pulse,
chn.time_spacing,
&channel_path,
&chn.config,
split,
node,
ensemble,
&chn.gen_var,
)
.await?;
evix = res.evix;
ts = res.ts;
pulse = res.pulse;
}
Ok(())
}
async fn gen_config(config_path: &Path, config: &SfDbChConf, _node: &Node, _ensemble: &Ensemble) -> Result<(), Error> {
let path = config_path.join("latest");
tokio::fs::create_dir_all(&path).await?;
let path = path.join("00000_Config");
info!("try to open {:?}", path);
let mut file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(path)
.await?;
let mut buf = BytesMut::with_capacity(1024 * 1);
let ver = 0;
buf.put_i16(ver);
let cnenc = config.channel.name().as_bytes();
let len1 = cnenc.len() + 8;
buf.put_i32(len1 as i32);
buf.put(cnenc);
buf.put_i32(len1 as i32);
let ts = 0;
let pulse = 0;
let sc = 0;
let status = 0;
let bb = 0;
let modulo = 0;
let offset = 0;
let precision = 0;
let p1 = buf.len();
buf.put_i32(0x20202020);
buf.put_i64(ts);
buf.put_i64(pulse);
buf.put_u32(config.keyspace as u32);
buf.put_u64(config.time_bin_size.ns() / MS);
buf.put_i32(sc);
buf.put_i32(status);
buf.put_i8(bb);
buf.put_i32(modulo);
buf.put_i32(offset);
buf.put_i16(precision);
{
// this len does not include itself and there seems to be no copy of it afterwards.
let p3 = buf.len();
buf.put_i32(404040);
buf.put_u8(config.dtflags());
buf.put_u8(config.scalar_type.index());
if config.compression {
let method = 0;
buf.put_i8(method);
}
match config.shape {
Shape::Scalar => {}
Shape::Wave(k) => {
buf.put_i8(1);
buf.put_i32(k as i32);
}
Shape::Image(_, _) => {
// TODO test data
err::todoval()
}
}
let len = buf.len() - p3 - 4;
buf.as_mut()[p3..].as_mut().put_i32(len as i32);
}
// source name
buf.put_i32(-1);
// unit
buf.put_i32(-1);
// description
buf.put_i32(-1);
// optional fields
buf.put_i32(-1);
// value converter
buf.put_i32(-1);
let p2 = buf.len();
let len = p2 - p1 + 4;
buf.put_i32(len as i32);
buf.as_mut()[p1..].as_mut().put_i32(len as i32);
file.write_all(&buf).await?;
Ok(())
}
struct CountedFile {
file: File,
bytes: u64,
}
impl CountedFile {
pub fn new(file: File) -> Self {
Self { file, bytes: 0 }
}
pub async fn write_all(&mut self, buf: &[u8]) -> Result<u64, Error> {
let l = buf.len();
let mut i = 0;
loop {
match self.file.write(&buf[i..]).await {
Ok(n) => {
i += n;
self.bytes += n as u64;
if i >= l {
break;
}
}
Err(e) => Err(e)?,
}
}
Ok(i as u64)
}
pub fn written_len(&self) -> u64 {
self.bytes
}
}
struct GenTimebinRes {
evix: u64,
ts: TsNano,
pulse: u64,
}
async fn gen_timebin(
evix: u64,
ts: TsNano,
pulse: u64,
ts_spacing: u64,
channel_path: &Path,
config: &SfDbChConf,
split: u32,
_node: &Node,
ensemble: &Ensemble,
gen_var: &GenVar,
) -> Result<GenTimebinRes, Error> {
let tb = ts.ns() / config.time_bin_size.ns();
let path = channel_path.join(format!("{:019}", tb)).join(format!("{:010}", split));
tokio::fs::create_dir_all(&path).await?;
let data_path = path.join(format!("{:019}_{:05}_Data", config.time_bin_size.ns() / MS, 0));
let index_path = path.join(format!("{:019}_{:05}_Data_Index", config.time_bin_size.ns() / MS, 0));
info!("open data file {:?}", data_path);
let file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(data_path)
.await?;
let mut file = CountedFile::new(file);
let mut index_file = if let Shape::Wave(_) = config.shape {
info!("open index file {:?}", index_path);
let f = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(index_path)
.await?;
let mut f = CountedFile::new(f);
f.write_all(b"\x00\x00").await?;
Some(f)
} else {
None
};
gen_datafile_header(&mut file, config).await?;
let mut evix = evix;
let mut ts = ts;
let mut pulse = pulse;
let tsmax = TsNano((tb + 1) * config.time_bin_size.ns());
while ts.ns() < tsmax.ns() {
match gen_var {
// TODO
// Splits and nodes are not in 1-to-1 correspondence.
GenVar::Default => {
if evix % ensemble.nodes.len() as u64 == split as u64 {
gen_event(&mut file, index_file.as_mut(), evix, ts.clone(), pulse, config, gen_var).await?;
}
}
GenVar::ConstRegular => {
if evix % ensemble.nodes.len() as u64 == split as u64 {
gen_event(&mut file, index_file.as_mut(), evix, ts.clone(), pulse, config, gen_var).await?;
}
}
GenVar::TimeWeight => {
let m = evix % 20;
if m == 0 || m == 1 {
if evix % ensemble.nodes.len() as u64 == split as u64 {
gen_event(&mut file, index_file.as_mut(), evix, ts.clone(), pulse, config, gen_var).await?;
}
}
}
}
evix += 1;
ts.0 += ts_spacing;
pulse += 1;
}
let ret = GenTimebinRes { evix, ts, pulse };
Ok(ret)
}
async fn gen_datafile_header(file: &mut CountedFile, config: &SfDbChConf) -> Result<(), Error> {
let mut buf = BytesMut::with_capacity(1024);
let cnenc = config.channel.name().as_bytes();
let len1 = cnenc.len() + 8;
buf.put_i16(0);
buf.put_i32(len1 as i32);
buf.put(cnenc);
buf.put_i32(len1 as i32);
file.write_all(&buf).await?;
Ok(())
}
async fn gen_event(
file: &mut CountedFile,
index_file: Option<&mut CountedFile>,
evix: u64,
ts: TsNano,
pulse: u64,
config: &SfDbChConf,
gen_var: &GenVar,
) -> Result<(), Error> {
let ttl = 0xcafecafe;
let ioc_ts = 0xcafecafe;
let mut buf = BytesMut::with_capacity(1024 * 16);
buf.put_i32(0xcafecafe as u32 as i32);
buf.put_u64(ttl);
buf.put_u64(ts.ns());
buf.put_u64(pulse);
buf.put_u64(ioc_ts);
buf.put_u8(0);
buf.put_u8(0);
buf.put_i32(-1);
use streams::dtflags::*;
if config.compression {
match config.shape {
Shape::Wave(ele_count) => {
let mut flags = COMPRESSION | ARRAY | SHAPE;
if config.byte_order.is_be() {
flags |= BIG_ENDIAN;
}
buf.put_u8(flags);
buf.put_u8(config.scalar_type.index());
let comp_method = 0 as u8;
buf.put_u8(comp_method);
buf.put_u8(1);
buf.put_u32(ele_count as u32);
match &config.scalar_type {
ScalarType::F64 => {
let ele_size = 8;
let mut vals = vec![0; (ele_size * ele_count) as usize];
for i1 in 0..ele_count {
let v = (evix as f64) * 100.0 + i1 as f64;
let a = if config.byte_order.is_be() {
v.to_be_bytes()
} else {
v.to_le_bytes()
};
use std::io::{Cursor, Seek, SeekFrom, Write};
let mut c1 = Cursor::new(&mut vals);
c1.seek(SeekFrom::Start(i1 as u64 * ele_size as u64))?;
Write::write_all(&mut c1, &a)?;
}
let mut comp = vec![0u8; (ele_size * ele_count + 64) as usize];
let n1 =
bitshuffle_compress(&vals, &mut comp, ele_count as usize, ele_size as usize, 0).unwrap();
buf.put_u64(vals.len() as u64);
let comp_block_size = 0;
buf.put_u32(comp_block_size);
buf.put(&comp[..n1]);
}
ScalarType::U16 => {
let ele_size = 2;
let mut vals = vec![0; (ele_size * ele_count) as usize];
for i1 in 0..ele_count {
let v = (evix as u16).wrapping_mul(100).wrapping_add(i1 as u16);
let a = if config.byte_order.is_be() {
v.to_be_bytes()
} else {
v.to_le_bytes()
};
use std::io::{Cursor, Seek, SeekFrom, Write};
let mut c1 = Cursor::new(&mut vals);
c1.seek(SeekFrom::Start(i1 as u64 * ele_size as u64))?;
Write::write_all(&mut c1, &a)?;
}
let mut comp = vec![0u8; (ele_size * ele_count + 64) as usize];
let n1 =
bitshuffle_compress(&vals, &mut comp, ele_count as usize, ele_size as usize, 0).unwrap();
buf.put_u64(vals.len() as u64);
let comp_block_size = 0;
buf.put_u32(comp_block_size);
buf.put(&comp[..n1]);
}
_ => todo!("Datatype not yet supported: {:?}", config.scalar_type),
}
}
_ => todo!("Shape not yet supported: {:?}", config.shape),
}
} else {
match config.shape {
Shape::Scalar => {
let mut flags = 0;
if config.byte_order.is_be() {
flags |= BIG_ENDIAN;
}
buf.put_u8(flags);
buf.put_u8(config.scalar_type.index());
match &config.scalar_type {
ScalarType::I32 => {
let v = match gen_var {
GenVar::Default => evix as i32,
GenVar::ConstRegular => 42 as i32,
GenVar::TimeWeight => {
let m = evix % 20;
if m == 0 {
200
} else if m == 1 {
400
} else {
0
}
}
};
if config.byte_order.is_be() {
buf.put_i32(v);
} else {
buf.put_i32_le(v);
};
}
_ => todo!("Datatype not yet supported: {:?}", config.scalar_type),
}
}
_ => todo!("Shape not yet supported: {:?}", config.shape),
}
}
{
let len = buf.len() as u32 + 4;
buf.put_u32(len);
buf.as_mut().put_u32(len);
}
let z = file.written_len();
file.write_all(buf.as_ref()).await?;
if let Some(f) = index_file {
let mut buf = BytesMut::with_capacity(16);
buf.put_u64(ts.ns());
buf.put_u64(z);
f.write_all(&buf).await?;
}
Ok(())
}

345
crates/disk/src/index.rs Normal file
View File

@@ -0,0 +1,345 @@
use arrayref::array_ref;
use err::Error;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::TsNano;
use std::mem::size_of;
use tokio::fs::File;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncSeekExt;
use tokio::io::SeekFrom;
pub fn find_ge(range: NanoRange, expand_right: bool, buf: &[u8]) -> Result<Option<(u64, u64)>, Error> {
type VT = u64;
const NT: usize = size_of::<VT>();
const N: usize = 2 * NT;
let n1 = buf.len();
if n1 % N != 0 {
return Err(Error::with_msg(format!("find_ge bad len {}", n1)));
}
if n1 == 0 {
warn!("Empty index data");
return Ok(None);
}
let n1 = n1 / N;
let a = unsafe {
let ptr = &buf[0] as *const u8 as *const ([u8; NT], [u8; NT]);
std::slice::from_raw_parts(ptr, n1)
};
let mut j = 0;
let mut k = n1 - 1;
let x = VT::from_be_bytes(a[j].0);
let y = VT::from_be_bytes(a[k].0);
if y < range.beg {
return Ok(None);
}
if x >= range.beg {
if x < range.end || expand_right {
return Ok(Some((x, VT::from_be_bytes(a[j].1))));
} else {
return Ok(None);
}
}
if x >= y {
return Err(Error::with_public_msg(format!(
"search in unordered data ts1 {x} ts2 {y}"
)));
}
let mut x = x;
let mut y = y;
loop {
if x >= y {
return Err(Error::with_public_msg(format!(
"search (loop) in unordered data ts1 {x} ts2 {y}"
)));
}
if k - j < 2 {
if y < range.end || expand_right {
let ret = (y, VT::from_be_bytes(a[k].1));
return Ok(Some(ret));
} else {
return Ok(None);
}
}
let m = (k + j) / 2;
let e = VT::from_be_bytes(a[m].0);
if e < range.beg {
j = m;
x = e;
} else {
k = m;
y = e;
}
}
}
pub fn find_largest_smaller_than(
range: NanoRange,
_expand_right: bool,
buf: &[u8],
) -> Result<Option<(u64, u64)>, Error> {
type NUM = u64;
const ELESIZE: usize = size_of::<NUM>();
const N: usize = 2 * ELESIZE;
let n1 = buf.len();
if n1 % N != 0 {
return Err(Error::with_msg(format!("find_ge bad len {}", n1)));
}
if n1 == 0 {
warn!("Empty index data");
return Ok(None);
}
let n1 = n1 / N;
let a = unsafe {
let ptr = &buf[0] as *const u8 as *const ([u8; ELESIZE], [u8; ELESIZE]);
std::slice::from_raw_parts(ptr, n1)
};
let mut j = 0;
let mut k = n1 - 1;
let x = NUM::from_be_bytes(a[j].0);
let y = NUM::from_be_bytes(a[k].0);
if x >= range.beg {
return Ok(None);
}
if y < range.beg {
let ret = (y, NUM::from_be_bytes(a[k].1));
return Ok(Some(ret));
}
if x >= y {
return Err(Error::with_public_msg(format!(
"search in unordered data ts1 {x} ts2 {y}"
)));
}
let mut x = x;
let mut y = y;
loop {
if x >= y {
return Err(Error::with_public_msg(format!(
"search (loop) in unordered data ts1 {x} ts2 {y}"
)));
}
if k - j < 2 {
let ret = (x, NUM::from_be_bytes(a[j].1));
return Ok(Some(ret));
}
let m = (k + j) / 2;
let e = NUM::from_be_bytes(a[m].0);
if e < range.beg {
j = m;
x = e;
} else {
k = m;
y = e;
}
}
}
async fn read(buf: &mut [u8], file: &mut File) -> Result<usize, Error> {
let mut wp = 0;
loop {
let n1 = file.read(&mut buf[wp..]).await?;
if n1 == 0 {
break;
} else {
wp += n1;
}
if wp >= buf.len() {
break;
}
}
Ok(wp)
}
pub fn parse_channel_header(buf: &[u8]) -> Result<(u32,), Error> {
if buf.len() < 6 {
return Err(Error::with_msg(format!("parse_channel_header buf len: {}", buf.len())));
}
let ver = i16::from_be_bytes(*array_ref![buf, 0, 2]);
if ver != 0 {
return Err(Error::with_msg(format!("unknown file version: {}", ver)));
}
let len1 = u32::from_be_bytes(*array_ref![buf, 2, 4]);
if len1 < 9 || len1 > 256 {
return Err(Error::with_msg(format!("unexpected data file header len1: {}", len1)));
}
if buf.len() < 2 + len1 as usize {
return Err(Error::with_msg(format!(
"data file header not contained in buffer len1: {} vs {}",
len1,
buf.len()
)));
}
let len2 = u32::from_be_bytes(*array_ref![buf, 2 + len1 as usize - 4, 4]);
if len1 != len2 {
return Err(Error::with_msg(format!("len mismatch len1: {} len2: {}", len1, len2)));
}
Ok((len1 as u32,))
}
pub fn parse_event(buf: &[u8]) -> Result<(u32, TsNano), Error> {
if buf.len() < 4 {
return Err(Error::with_msg(format!("parse_event buf len: {}", buf.len())));
}
let len1 = u32::from_be_bytes(*array_ref![buf, 0, 4]);
if len1 < 9 || len1 > 512 {
return Err(Error::with_msg(format!("unexpected event len1: {}", len1)));
}
if buf.len() < len1 as usize {
return Err(Error::with_msg(format!(
"event not contained in buffer len1: {} vs {}",
len1,
buf.len()
)));
}
let len2 = u32::from_be_bytes(*array_ref![buf, len1 as usize - 4, 4]);
if len1 != len2 {
return Err(Error::with_msg(format!("len mismatch len1: {} len2: {}", len1, len2)));
}
let ts = u64::from_be_bytes(*array_ref![buf, 12, 8]);
Ok((len1 as u32, TsNano(ts)))
}
pub async fn read_event_at(pos: u64, file: &mut File) -> Result<(u32, TsNano), Error> {
file.seek(SeekFrom::Start(pos)).await?;
let mut buf = vec![0; 1024];
let _n1 = read(&mut buf, file).await?;
let ev = parse_event(&buf)?;
Ok(ev)
}
pub async fn position_static_len_datafile(
mut file: File,
range: NanoRange,
expand_right: bool,
) -> Result<(File, bool, u32, u64), Error> {
let flen = file.seek(SeekFrom::End(0)).await?;
file.seek(SeekFrom::Start(0)).await?;
let mut buf = vec![0; 1024];
let _n1 = read(&mut buf, &mut file).await?;
let hres = parse_channel_header(&buf)?;
let headoff = 2 + hres.0 as u64;
let ev = parse_event(&buf[headoff as usize..])?;
let evlen = ev.0 as u64;
let mut j = headoff;
let mut k = ((flen - headoff) / evlen - 1) * evlen + headoff;
let x = ev.1.ns();
let t = read_event_at(k, &mut file).await?;
if t.0 != evlen as u32 {
Err(Error::with_msg(format!(
"inconsistent event lengths: {} vs {}",
t.0, evlen
)))?;
}
let y = t.1.ns();
let mut nreads = 2;
if x >= range.end {
if expand_right {
file.seek(SeekFrom::Start(j)).await?;
return Ok((file, true, nreads, j));
} else {
file.seek(SeekFrom::Start(0)).await?;
return Ok((file, false, nreads, 0));
}
}
if y < range.beg {
file.seek(SeekFrom::Start(j)).await?;
return Ok((file, false, nreads, j));
}
if x >= range.beg {
if x < range.end || expand_right {
file.seek(SeekFrom::Start(j)).await?;
return Ok((file, true, nreads, j));
} else {
file.seek(SeekFrom::Start(0)).await?;
return Ok((file, false, nreads, 0));
}
}
let mut x = x;
let mut y = y;
loop {
assert!(x < y);
assert_eq!((k - j) % evlen, 0);
if k - j < 2 * evlen {
if y < range.end || expand_right {
file.seek(SeekFrom::Start(k)).await?;
return Ok((file, true, nreads, k));
} else {
file.seek(SeekFrom::Start(0)).await?;
return Ok((file, false, nreads, 0));
}
}
let m = j + (k - j) / 2 / evlen * evlen;
let t = read_event_at(m, &mut file).await?;
if t.0 != evlen as u32 {
Err(Error::with_msg(format!(
"inconsistent event lengths: {} vs {}",
t.0, evlen
)))?;
}
nreads += 1;
let e = t.1.ns();
if e < range.beg {
x = e;
j = m;
} else {
y = e;
k = m;
}
}
}
pub async fn position_static_len_datafile_at_largest_smaller_than(
mut file: File,
range: NanoRange,
_expand_right: bool,
) -> Result<(File, bool, u32, u64), Error> {
let flen = file.seek(SeekFrom::End(0)).await?;
file.seek(SeekFrom::Start(0)).await?;
let mut buf = vec![0; 1024];
let _n1 = read(&mut buf, &mut file).await?;
let hres = parse_channel_header(&buf)?;
let headoff = 2 + hres.0 as u64;
let ev = parse_event(&buf[headoff as usize..])?;
let evlen = ev.0 as u64;
let mut j = headoff;
let mut k = ((flen - headoff) / evlen - 1) * evlen + headoff;
let x = ev.1.ns();
let t = read_event_at(k, &mut file).await?;
if t.0 != evlen as u32 {
Err(Error::with_msg(format!(
"inconsistent event lengths: {} vs {}",
t.0, evlen
)))?;
}
let y = t.1.ns();
let mut nreads = 2;
if x >= range.beg {
file.seek(SeekFrom::Start(j)).await?;
return Ok((file, false, nreads, j));
}
if y < range.beg {
file.seek(SeekFrom::Start(k)).await?;
return Ok((file, true, nreads, k));
}
loop {
if k - j < 2 * evlen {
file.seek(SeekFrom::Start(j)).await?;
return Ok((file, true, nreads, j));
}
let m = j + (k - j) / 2 / evlen * evlen;
let t = read_event_at(m, &mut file).await?;
if t.0 != evlen as u32 {
Err(Error::with_msg(format!(
"inconsistent event lengths: {} vs {}",
t.0, evlen
)))?;
}
nreads += 1;
let x = t.1.ns();
if x < range.beg {
j = m;
} else {
k = m;
}
}
}

1
crates/disk/src/merge.rs Normal file
View File

@@ -0,0 +1 @@
pub mod mergedblobsfromremotes;

View File

@@ -0,0 +1,110 @@
use err::Error;
use futures_util::pin_mut;
use futures_util::Stream;
use futures_util::StreamExt;
use items_0::streamitem::Sitemty;
use items_2::eventfull::EventFull;
use items_2::merger::Merger;
use netpod::log::*;
use netpod::Cluster;
use netpod::SfChFetchInfo;
use query::api4::events::EventsSubQuery;
use std::future::Future;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use streams::tcprawclient::x_processed_event_blobs_stream_from_node;
type T001<T> = Pin<Box<dyn Stream<Item = Sitemty<T>> + Send>>;
type T002<T> = Pin<Box<dyn Future<Output = Result<T001<T>, Error>> + Send>>;
pub struct MergedBlobsFromRemotes {
tcp_establish_futs: Vec<T002<EventFull>>,
nodein: Vec<Option<T001<EventFull>>>,
merged: Option<T001<EventFull>>,
completed: bool,
errored: bool,
}
impl MergedBlobsFromRemotes {
pub fn new(subq: EventsSubQuery, cluster: Cluster) -> Self {
debug!("MergedBlobsFromRemotes subq {:?}", subq);
let mut tcp_establish_futs = Vec::new();
for node in &cluster.nodes {
let f = x_processed_event_blobs_stream_from_node(subq.clone(), node.clone());
let f: T002<EventFull> = Box::pin(f);
tcp_establish_futs.push(f);
}
let n = tcp_establish_futs.len();
Self {
tcp_establish_futs,
nodein: (0..n).into_iter().map(|_| None).collect(),
merged: None,
completed: false,
errored: false,
}
}
}
impl Stream for MergedBlobsFromRemotes {
type Item = Sitemty<EventFull>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
'outer: loop {
break if self.completed {
panic!("poll_next on completed");
} else if self.errored {
self.completed = true;
return Ready(None);
} else if let Some(fut) = &mut self.merged {
match fut.poll_next_unpin(cx) {
Ready(Some(Ok(k))) => Ready(Some(Ok(k))),
Ready(Some(Err(e))) => {
self.errored = true;
Ready(Some(Err(e)))
}
Ready(None) => {
self.completed = true;
Ready(None)
}
Pending => Pending,
}
} else {
let mut pend = false;
let mut c1 = 0;
for i1 in 0..self.tcp_establish_futs.len() {
if self.nodein[i1].is_none() {
let f = &mut self.tcp_establish_futs[i1];
pin_mut!(f);
match f.poll(cx) {
Ready(Ok(k)) => {
self.nodein[i1] = Some(k);
}
Ready(Err(e)) => {
self.errored = true;
return Ready(Some(Err(e)));
}
Pending => {
pend = true;
}
}
} else {
c1 += 1;
}
}
if pend {
Pending
} else {
if c1 == self.tcp_establish_futs.len() {
let inps = self.nodein.iter_mut().map(|k| k.take().unwrap()).collect();
// TODO set out_max_len dynamically
let s1 = Merger::new(inps, 128);
self.merged = Some(Box::pin(s1));
}
continue 'outer;
}
};
}
}
}

133
crates/disk/src/paths.rs Normal file
View File

@@ -0,0 +1,133 @@
use crate::SfDbChConf;
use err::Error;
use futures_util::StreamExt;
use netpod::timeunits::MS;
use netpod::Node;
use netpod::SfChFetchInfo;
use netpod::TsNano;
use std::path::PathBuf;
// TODO remove/replace this
pub fn datapath(timebin: u64, config: &SfDbChConf, split: u32, node: &Node) -> PathBuf {
node.sf_databuffer
.as_ref()
.unwrap()
.data_base_path
.join(format!(
"{}_{}",
node.sf_databuffer.as_ref().unwrap().ksprefix,
config.keyspace
))
.join("byTime")
.join(config.channel.name())
.join(format!("{:019}", timebin))
.join(format!("{:010}", split))
.join(format!("{:019}_00000_Data", config.time_bin_size.ns() / MS))
}
/**
Return potential datafile paths for the given timebin.
It says "potential datafile paths" because we don't open the file here yet and of course,
files may vanish until then. Also, the timebin may actually not exist.
*/
pub async fn datapaths_for_timebin(
timebin: u64,
fetch_info: &SfChFetchInfo,
node: &Node,
) -> Result<Vec<PathBuf>, Error> {
let sfc = node.sf_databuffer.as_ref().unwrap();
let timebin_path = sfc
.data_base_path
.join(format!("{}_{}", sfc.ksprefix, fetch_info.ks()))
.join("byTime")
.join(fetch_info.name())
.join(format!("{:019}", timebin));
let rd = tokio::fs::read_dir(timebin_path).await?;
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
let mut splits = vec![];
while let Some(e) = rd.next().await {
let e = e?;
let dn = e
.file_name()
.into_string()
.map_err(|s| Error::with_msg(format!("Bad OS path {:?} path: {:?}", s, e.path())))?;
if dn.len() != 10 {
return Err(Error::with_msg(format!("bad split dirname path: {:?}", e.path())));
}
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
if vv == 10 {
let split: u64 = dn.parse()?;
match &sfc.splits {
Some(sps) => {
if sps.contains(&split) {
splits.push(split);
}
}
None => {
splits.push(split);
}
}
}
}
let mut ret = vec![];
for split in splits {
let path = sfc
.data_base_path
.join(format!("{}_{}", sfc.ksprefix, fetch_info.ks()))
.join("byTime")
.join(fetch_info.name())
.join(format!("{:019}", timebin))
.join(format!("{:010}", split))
.join(format!("{:019}_00000_Data", fetch_info.bs().ns() / MS));
ret.push(path);
}
Ok(ret)
}
pub fn channel_timebins_dir_path(fetch_info: &SfChFetchInfo, node: &Node) -> Result<PathBuf, Error> {
let sfc = node.sf_databuffer.as_ref().unwrap();
let ret = sfc
.data_base_path
.join(format!("{}_{}", sfc.ksprefix, fetch_info.ks()))
.join("byTime")
.join(fetch_info.name());
Ok(ret)
}
pub fn data_dir_path(ts: TsNano, fetch_info: &SfChFetchInfo, split: u32, node: &Node) -> Result<PathBuf, Error> {
let ret = channel_timebins_dir_path(fetch_info, node)?
.join(format!("{:019}", ts.ns() / fetch_info.bs().ns()))
.join(format!("{:010}", split));
Ok(ret)
}
pub fn data_path(ts: TsNano, fetch_info: &SfChFetchInfo, split: u32, node: &Node) -> Result<PathBuf, Error> {
let fname = format!("{:019}_{:05}_Data", fetch_info.bs().ns() / MS, 0);
let ret = data_dir_path(ts, fetch_info, split, node)?.join(fname);
Ok(ret)
}
pub fn index_path(ts: TsNano, fetch_info: &SfChFetchInfo, split: u32, node: &Node) -> Result<PathBuf, Error> {
let fname = format!("{:019}_{:05}_Data_Index", fetch_info.bs().ns() / MS, 0);
let ret = data_dir_path(ts, fetch_info, split, node)?.join(fname);
Ok(ret)
}
pub fn data_dir_path_tb(ks: u32, channel_name: &str, tb: u32, split: u32, node: &Node) -> Result<PathBuf, Error> {
let sfc = node.sf_databuffer.as_ref().unwrap();
let ret = sfc
.data_base_path
.join(format!("{}_{}", sfc.ksprefix, ks))
.join("byTime")
.join(channel_name)
.join(format!("{:019}", tb))
.join(format!("{:010}", split));
Ok(ret)
}
pub fn data_path_tb(ks: u32, channel_name: &str, tb: u32, tbs: u32, split: u32, node: &Node) -> Result<PathBuf, Error> {
let fname = format!("{:019}_{:05}_Data", tbs, 0);
let ret = data_dir_path_tb(ks, channel_name, tb, split, node)?.join(fname);
Ok(ret)
}

2
crates/disk/src/raw.rs Normal file
View File

@@ -0,0 +1,2 @@
pub mod conn;
pub mod generated;

262
crates/disk/src/raw/conn.rs Normal file
View File

@@ -0,0 +1,262 @@
use crate::eventblobs::EventChunkerMultifile;
use crate::eventchunker::EventChunkerConf;
use crate::raw::generated::EventBlobsGeneratorI32Test00;
use crate::raw::generated::EventBlobsGeneratorI32Test01;
use err::Error;
use futures_util::stream;
use futures_util::Stream;
use futures_util::StreamExt;
use items_0::streamitem::RangeCompletableItem;
use items_0::streamitem::Sitemty;
use items_0::streamitem::StreamItem;
use items_2::channelevents::ChannelEvents;
use items_2::eventfull::EventFull;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::AggKind;
use netpod::ByteSize;
use netpod::DiskIoTune;
use netpod::NodeConfigCached;
use netpod::SfChFetchInfo;
use query::api4::events::EventsSubQuery;
use std::pin::Pin;
const TEST_BACKEND: &str = "testbackend-00";
fn make_num_pipeline_stream_evs(
fetch_info: SfChFetchInfo,
agg_kind: AggKind,
event_blobs: EventChunkerMultifile,
) -> Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>> {
let scalar_type = fetch_info.scalar_type().clone();
let shape = fetch_info.shape().clone();
let event_stream = match crate::decode::EventsDynStream::new(scalar_type, shape, agg_kind, event_blobs) {
Ok(k) => k,
Err(e) => {
return Box::pin(stream::iter([Err(e)]));
}
};
let stream = event_stream.map(|item| match item {
Ok(item) => match item {
StreamItem::DataItem(item) => match item {
RangeCompletableItem::RangeComplete => Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)),
RangeCompletableItem::Data(item) => Ok(StreamItem::DataItem(RangeCompletableItem::Data(
ChannelEvents::Events(item),
))),
},
StreamItem::Log(k) => Ok(StreamItem::Log(k)),
StreamItem::Stats(k) => Ok(StreamItem::Stats(k)),
},
Err(e) => Err(e),
});
Box::pin(stream)
}
pub async fn make_event_pipe(
evq: EventsSubQuery,
fetch_info: SfChFetchInfo,
ncc: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>, Error> {
// sf-databuffer type backends identify channels by their (backend, name) only.
let range = evq.range().clone();
let one_before = evq.transform().need_one_before_range();
info!(
"make_event_pipe need_expand {need_expand} {evq:?}",
need_expand = one_before
);
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
// TODO should not need this for correctness.
// Should limit based on return size and latency.
let out_max_len = if ncc.node_config.cluster.is_central_storage {
128
} else {
128
};
let do_decompress = true;
let event_blobs = EventChunkerMultifile::new(
(&range).try_into()?,
fetch_info.clone(),
ncc.node.clone(),
ncc.ix,
DiskIoTune::default(),
event_chunker_conf,
one_before,
do_decompress,
out_max_len,
);
error!("TODO replace AggKind in the called code");
let pipe = make_num_pipeline_stream_evs(fetch_info, AggKind::TimeWeightedScalar, event_blobs);
Ok(pipe)
}
pub fn make_local_event_blobs_stream(
range: NanoRange,
fetch_info: SfChFetchInfo,
expand: bool,
do_decompress: bool,
event_chunker_conf: EventChunkerConf,
disk_io_tune: DiskIoTune,
node_config: &NodeConfigCached,
) -> Result<EventChunkerMultifile, Error> {
info!(
"make_local_event_blobs_stream {fetch_info:?} do_decompress {do_decompress} disk_io_tune {disk_io_tune:?}"
);
if do_decompress {
warn!("Possible issue: decompress central storage event blob stream");
}
// TODO should not need this for correctness.
// Should limit based on return size and latency.
let out_max_len = if node_config.node_config.cluster.is_central_storage {
128
} else {
128
};
let event_blobs = EventChunkerMultifile::new(
range,
fetch_info.clone(),
node_config.node.clone(),
node_config.ix,
disk_io_tune,
event_chunker_conf,
expand,
do_decompress,
out_max_len,
);
Ok(event_blobs)
}
pub fn make_remote_event_blobs_stream(
range: NanoRange,
fetch_info: SfChFetchInfo,
expand: bool,
do_decompress: bool,
event_chunker_conf: EventChunkerConf,
disk_io_tune: DiskIoTune,
node_config: &NodeConfigCached,
) -> Result<impl Stream<Item = Sitemty<EventFull>>, Error> {
debug!("make_remote_event_blobs_stream");
// TODO should not need this for correctness.
// Should limit based on return size and latency.
let out_max_len = if node_config.node_config.cluster.is_central_storage {
128
} else {
128
};
let event_blobs = EventChunkerMultifile::new(
range,
fetch_info.clone(),
node_config.node.clone(),
node_config.ix,
disk_io_tune,
event_chunker_conf,
expand,
do_decompress,
out_max_len,
);
Ok(event_blobs)
}
pub async fn make_event_blobs_pipe_real(
subq: &EventsSubQuery,
fetch_info: &SfChFetchInfo,
node_config: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<EventFull>> + Send>>, Error> {
if false {
match dbconn::channel_exists(subq.name(), &node_config).await {
Ok(_) => (),
Err(e) => return Err(e)?,
}
}
let expand = subq.transform().need_one_before_range();
let range = subq.range();
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
// TODO should depend on host config
let do_local = node_config.node_config.cluster.is_central_storage;
let pipe = if do_local {
let event_blobs = make_local_event_blobs_stream(
range.try_into()?,
fetch_info.clone(),
expand,
false,
event_chunker_conf,
DiskIoTune::default(),
node_config,
)?;
Box::pin(event_blobs) as _
} else {
let event_blobs = make_remote_event_blobs_stream(
range.try_into()?,
fetch_info.clone(),
expand,
true,
event_chunker_conf,
DiskIoTune::default(),
node_config,
)?;
/*
type ItemType = Sitemty<EventFull>;
let s = event_blobs.map(|item: ItemType| Box::new(item) as Box<dyn Framable + Send>);
//let s = tracing_futures::Instrumented::instrument(s, tracing::info_span!("make_event_blobs_pipe"));
let pipe: Pin<Box<dyn Stream<Item = Box<dyn Framable + Send>> + Send>>;
pipe = Box::pin(s);
pipe*/
Box::pin(event_blobs) as _
};
Ok(pipe)
}
pub async fn make_event_blobs_pipe_test(
subq: &EventsSubQuery,
node_config: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<EventFull>> + Send>>, Error> {
warn!("GENERATE INMEM TEST DATA");
let node_count = node_config.node_config.cluster.nodes.len() as u64;
let node_ix = node_config.ix as u64;
let chn = subq.name();
let range = subq.range().clone();
if chn == "test-gen-i32-dim0-v00" {
Ok(Box::pin(EventBlobsGeneratorI32Test00::new(node_ix, node_count, range)))
} else if chn == "test-gen-i32-dim0-v01" {
Ok(Box::pin(EventBlobsGeneratorI32Test01::new(node_ix, node_count, range)))
} else {
let na: Vec<_> = chn.split("-").collect();
if na.len() != 3 {
Err(Error::with_msg_no_trace(format!(
"can not understand test channel name: {chn:?}"
)))
} else {
if na[0] != "inmem" {
Err(Error::with_msg_no_trace(format!(
"can not understand test channel name: {chn:?}"
)))
} else {
if na[1] == "d0" {
if na[2] == "i32" {
Ok(Box::pin(EventBlobsGeneratorI32Test00::new(node_ix, node_count, range)))
} else {
Err(Error::with_msg_no_trace(format!(
"can not understand test channel name: {chn:?}"
)))
}
} else {
Err(Error::with_msg_no_trace(format!(
"can not understand test channel name: {chn:?}"
)))
}
}
}
}
}
pub async fn make_event_blobs_pipe(
subq: &EventsSubQuery,
fetch_info: &SfChFetchInfo,
node_config: &NodeConfigCached,
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<EventFull>> + Send>>, Error> {
debug!("make_event_blobs_pipe {subq:?}");
if subq.backend() == TEST_BACKEND {
make_event_blobs_pipe_test(subq, node_config).await
} else {
make_event_blobs_pipe_real(subq, fetch_info, node_config).await
}
}

View File

@@ -0,0 +1,225 @@
use futures_util::Future;
use futures_util::FutureExt;
use futures_util::Stream;
use items_0::container::ByteEstimate;
use items_0::streamitem::sitem_data;
use items_0::streamitem::RangeCompletableItem;
use items_0::streamitem::Sitemty;
use items_0::streamitem::StreamItem;
use items_0::Empty;
use items_2::eventfull::EventFull;
use netpod::range::evrange::SeriesRange;
use netpod::timeunits::MS;
use netpod::ScalarType;
use netpod::Shape;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
use std::time::Duration;
pub trait TypedGenerator {
type RustScalar;
}
pub struct EventBlobsGeneratorI32Test00 {
ts: u64,
dts: u64,
tsend: u64,
#[allow(unused)]
c1: u64,
scalar_type: ScalarType,
be: bool,
shape: Shape,
timeout: Option<Pin<Box<dyn Future<Output = ()> + Send>>>,
done: bool,
done_range_final: bool,
}
impl TypedGenerator for EventBlobsGeneratorI32Test00 {
type RustScalar = i32;
}
impl EventBlobsGeneratorI32Test00 {
pub fn new(node_ix: u64, node_count: u64, range: SeriesRange) -> Self {
let range = match range {
SeriesRange::TimeRange(k) => k,
SeriesRange::PulseRange(_) => todo!(),
};
let dt = MS * 1000;
let ts = (range.beg / dt + node_ix) * dt;
let dts = dt * node_count as u64;
let tsend = range.end;
Self {
ts,
dts,
tsend,
c1: 0,
scalar_type: ScalarType::I32,
be: true,
shape: Shape::Scalar,
timeout: None,
done: false,
done_range_final: false,
}
}
fn make_batch(&mut self) -> Sitemty<EventFull> {
// TODO should not repeat self type name
type T = <EventBlobsGeneratorI32Test00 as TypedGenerator>::RustScalar;
let mut item = EventFull::empty();
let mut ts = self.ts;
loop {
if ts >= self.tsend || item.byte_estimate() > 200 {
break;
}
let pulse = ts;
let value = (ts / (MS * 100) % 1000) as T;
item.add_event(
ts,
pulse,
Some(value.to_be_bytes().to_vec()),
None,
self.scalar_type.clone(),
self.be,
self.shape.clone(),
None,
);
ts += self.dts;
}
self.ts = ts;
let w = sitem_data(item);
w
}
}
impl Stream for EventBlobsGeneratorI32Test00 {
type Item = Sitemty<EventFull>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if self.done_range_final {
Ready(None)
} else if self.ts >= self.tsend {
self.done = true;
self.done_range_final = true;
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else if false {
// To use the generator without throttling, use this scope
Ready(Some(self.make_batch()))
} else if let Some(fut) = self.timeout.as_mut() {
match fut.poll_unpin(cx) {
Ready(()) => {
self.timeout = None;
Ready(Some(self.make_batch()))
}
Pending => Pending,
}
} else {
self.timeout = Some(Box::pin(tokio::time::sleep(Duration::from_millis(2))));
continue;
};
}
}
}
pub struct EventBlobsGeneratorI32Test01 {
ts: u64,
dts: u64,
tsend: u64,
#[allow(unused)]
c1: u64,
scalar_type: ScalarType,
be: bool,
shape: Shape,
timeout: Option<Pin<Box<dyn Future<Output = ()> + Send>>>,
done: bool,
done_range_final: bool,
}
impl TypedGenerator for EventBlobsGeneratorI32Test01 {
type RustScalar = i32;
}
impl EventBlobsGeneratorI32Test01 {
pub fn new(node_ix: u64, node_count: u64, range: SeriesRange) -> Self {
let range = match range {
SeriesRange::TimeRange(k) => k,
SeriesRange::PulseRange(_) => todo!(),
};
let dt = MS * 500;
let ts = (range.beg / dt + node_ix) * dt;
let dts = dt * node_count as u64;
let tsend = range.end;
Self {
ts,
dts,
tsend,
c1: 0,
scalar_type: ScalarType::I32,
be: true,
shape: Shape::Scalar,
timeout: None,
done: false,
done_range_final: false,
}
}
fn make_batch(&mut self) -> Sitemty<EventFull> {
type T = i32;
let mut item = EventFull::empty();
let mut ts = self.ts;
loop {
if self.ts >= self.tsend || item.byte_estimate() > 400 {
break;
}
let pulse = ts;
let value = (ts / self.dts) as T;
item.add_event(
ts,
pulse,
Some(value.to_be_bytes().to_vec()),
None,
self.scalar_type.clone(),
self.be,
self.shape.clone(),
None,
);
ts += self.dts;
}
self.ts = ts;
let w = sitem_data(item);
w
}
}
impl Stream for EventBlobsGeneratorI32Test01 {
type Item = Sitemty<EventFull>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
loop {
break if self.done_range_final {
Ready(None)
} else if self.ts >= self.tsend {
self.done = true;
self.done_range_final = true;
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
} else if false {
// To use the generator without throttling, use this scope
Ready(Some(self.make_batch()))
} else if let Some(fut) = self.timeout.as_mut() {
match fut.poll_unpin(cx) {
Ready(()) => {
self.timeout = None;
Ready(Some(self.make_batch()))
}
Pending => Pending,
}
} else {
self.timeout = Some(Box::pin(tokio::time::sleep(Duration::from_millis(2))));
continue;
};
}
}
}

180
crates/disk/src/read3.rs Normal file
View File

@@ -0,0 +1,180 @@
use bytes::BytesMut;
use err::Error;
use netpod::log::*;
use std::os::unix::prelude::RawFd;
use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
use std::sync::Once;
use std::time::{Duration, Instant};
use tokio::sync::{mpsc, oneshot};
const DO_TRACE: bool = false;
static READ3: AtomicPtr<Read3> = AtomicPtr::new(std::ptr::null_mut());
pub struct ReadTask {
fd: RawFd,
pos: u64,
count: u64,
rescell: oneshot::Sender<Result<ReadResult, Error>>,
}
pub struct ReadResult {
pub buf: BytesMut,
pub eof: bool,
}
pub struct Read3 {
jobs_tx: mpsc::Sender<ReadTask>,
rtx: crossbeam::channel::Sender<mpsc::Receiver<ReadTask>>,
threads_max: AtomicUsize,
can_not_publish: AtomicUsize,
}
impl Read3 {
pub fn get() -> &'static Self {
static INIT: Once = Once::new();
INIT.call_once(|| {
let (jtx, jrx) = mpsc::channel(512);
let (rtx, rrx) = crossbeam::channel::bounded(32);
let read3 = Read3 {
jobs_tx: jtx,
rtx,
threads_max: AtomicUsize::new(32),
can_not_publish: AtomicUsize::new(0),
};
let b = Box::new(read3);
let ptr = Box::into_raw(b);
READ3.store(ptr, Ordering::Release);
let ptr = READ3.load(Ordering::Acquire);
let h = unsafe { &*ptr };
if let Err(_) = h.rtx.send(jrx) {
error!("Read3 INIT: can not enqueue main job reader");
}
for wid in 0..128 {
let rrx = rrx.clone();
tokio::task::spawn_blocking(move || h.read_worker(wid, rrx));
}
});
let ptr = READ3.load(Ordering::Acquire);
unsafe { &*ptr }
}
pub fn threads_max(&self) -> usize {
self.threads_max.load(Ordering::Acquire)
}
pub fn set_threads_max(&self, max: usize) {
self.threads_max.store(max, Ordering::Release);
}
pub async fn read(&self, fd: RawFd, pos: u64, count: u64) -> Result<ReadResult, Error> {
let (tx, rx) = oneshot::channel();
let rt = ReadTask {
fd,
pos,
count,
rescell: tx,
};
match self.jobs_tx.send(rt).await {
Ok(_) => match rx.await {
Ok(res) => res,
Err(e) => Err(Error::with_msg(format!("can not receive read task result: {e}"))),
},
Err(e) => Err(Error::with_msg(format!("can not send read job task: {e}"))),
}
}
fn read_worker(&self, wid: u32, rrx: crossbeam::channel::Receiver<mpsc::Receiver<ReadTask>>) {
'outer: loop {
while wid as usize >= self.threads_max.load(Ordering::Acquire) {
std::thread::sleep(Duration::from_millis(4000));
}
match rrx.recv() {
Ok(mut jrx) => match jrx.blocking_recv() {
Some(rt) => match self.rtx.send(jrx) {
Ok(_) => self.read_worker_job(wid, rt),
Err(e) => {
error!("can not return the job receiver: wid {wid} {e}");
break 'outer;
}
},
None => {
let _ = self.rtx.send(jrx);
break 'outer;
}
},
Err(e) => {
error!("read_worker sees: wid {wid} {e}");
break 'outer;
}
}
}
}
fn read_worker_job(&self, wid: u32, rt: ReadTask) {
let ts1 = Instant::now();
let mut prc = 0;
let fd = rt.fd;
let mut rpos = rt.pos;
let mut buf = BytesMut::with_capacity(rt.count as usize);
let mut writable = rt.count as usize;
let rr = loop {
if DO_TRACE {
trace!("do pread fd {fd} count {writable} offset {rpos} wid {wid}");
}
let ec = unsafe { libc::pread(fd, buf.as_mut_ptr() as _, writable, rpos as i64) };
prc += 1;
if ec == -1 {
let errno = unsafe { *libc::__errno_location() };
if errno == libc::EINVAL {
debug!("pread EOF fd {fd} count {writable} offset {rpos} wid {wid}");
let rr = ReadResult { buf, eof: true };
break Ok(rr);
} else {
warn!("pread ERROR errno {errno} fd {fd} count {writable} offset {rpos} wid {wid}");
// TODO use a more structured error
let e = Error::with_msg_no_trace(format!(
"pread ERROR errno {errno} fd {fd} count {writable} offset {rpos} wid {wid}"
));
break Err(e);
}
} else if ec == 0 {
debug!("pread EOF fd {fd} count {writable} offset {rpos} wid {wid} prc {prc}");
let rr = ReadResult { buf, eof: true };
break Ok(rr);
} else if ec > 0 {
if ec as usize > writable {
error!("pread TOOLARGE ec {ec} fd {fd} count {writable} offset {rpos} wid {wid} prc {prc}");
return;
} else {
rpos += ec as u64;
writable -= ec as usize;
unsafe { buf.set_len(buf.len() + (ec as usize)) };
if writable == 0 {
let ts2 = Instant::now();
let dur = ts2.duration_since(ts1);
let dms = 1e3 * dur.as_secs_f32();
if DO_TRACE {
trace!("pread DONE ec {ec} fd {fd} wid {wid} prc {prc} dms {dms:.2}");
}
let rr = ReadResult { buf, eof: false };
break Ok(rr);
}
}
} else {
error!(
"pread UNEXPECTED ec {} fd {} count {} offset {rpos} wid {wid}",
ec, rt.fd, writable
);
return;
}
};
match rt.rescell.send(rr) {
Ok(_) => {}
Err(_) => {
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
warn!("can not publish the read result wid {wid}");
}
}
}
}

227
crates/disk/src/read4.rs Normal file
View File

@@ -0,0 +1,227 @@
use bytes::BytesMut;
use err::Error;
use netpod::log::*;
use std::os::unix::prelude::RawFd;
use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
use std::sync::Once;
use std::time::{Duration, Instant};
use tokio::sync::mpsc;
static READ4: AtomicPtr<Read4> = AtomicPtr::new(std::ptr::null_mut());
const DO_TRACE: bool = false;
pub struct ReadTask {
fd: RawFd,
buflen: u64,
read_queue_len: usize,
results: mpsc::Sender<Result<ReadResult, Error>>,
}
pub struct ReadResult {
pub buf: BytesMut,
pub eof: bool,
}
pub struct Read4 {
jobs_tx: mpsc::Sender<ReadTask>,
rtx: crossbeam::channel::Sender<mpsc::Receiver<ReadTask>>,
threads_max: AtomicUsize,
can_not_publish: AtomicUsize,
}
impl Read4 {
pub fn get() -> &'static Self {
static INIT: Once = Once::new();
INIT.call_once(|| {
let (jtx, jrx) = mpsc::channel(512);
let (rtx, rrx) = crossbeam::channel::bounded(32);
let read4 = Read4 {
jobs_tx: jtx,
rtx,
threads_max: AtomicUsize::new(32),
can_not_publish: AtomicUsize::new(0),
};
let b = Box::new(read4);
let ptr = Box::into_raw(b);
READ4.store(ptr, Ordering::Release);
let ptr = READ4.load(Ordering::Acquire);
let h = unsafe { &*ptr };
if let Err(_) = h.rtx.send(jrx) {
error!("Read4 INIT: can not enqueue main job reader");
}
for wid in 0..16 {
let rrx = rrx.clone();
tokio::task::spawn_blocking(move || h.read_worker(wid, rrx));
}
});
let ptr = READ4.load(Ordering::Acquire);
unsafe { &*ptr }
}
pub fn threads_max(&self) -> usize {
self.threads_max.load(Ordering::Acquire)
}
pub fn set_threads_max(&self, max: usize) {
self.threads_max.store(max, Ordering::Release);
}
pub async fn read(
&self,
fd: RawFd,
buflen: u64,
read_queue_len: usize,
) -> Result<mpsc::Receiver<Result<ReadResult, Error>>, Error> {
let (tx, rx) = mpsc::channel(32);
let rt = ReadTask {
fd,
buflen,
read_queue_len,
results: tx,
};
match self.jobs_tx.send(rt).await {
Ok(_) => Ok(rx),
Err(e) => Err(Error::with_msg(format!("can not send read job task: {e}"))),
}
}
fn read_worker(&self, wid: u32, rrx: crossbeam::channel::Receiver<mpsc::Receiver<ReadTask>>) {
loop {
while wid as usize >= self.threads_max.load(Ordering::Acquire) {
std::thread::sleep(Duration::from_millis(4000));
}
match rrx.recv() {
Ok(mut jrx) => match jrx.blocking_recv() {
Some(rt) => match self.rtx.send(jrx) {
Ok(_) => self.read_worker_job(wid, rt),
Err(e) => {
error!("can not return the job receiver: wid {wid} {e}");
return;
}
},
None => {
let _ = self.rtx.send(jrx);
return;
}
},
Err(e) => {
error!("read_worker sees: wid {wid} {e}");
return;
}
}
}
}
fn read_worker_job(&self, wid: u32, rt: ReadTask) {
let fd = rt.fd;
let ec = unsafe { libc::lseek(fd, 0, libc::SEEK_CUR) };
if ec == -1 {
let errno = unsafe { *libc::__errno_location() };
let msg = format!("seek error wid {wid} fd {fd} errno {errno}");
error!("{}", msg);
let e = Error::with_msg_no_trace(msg);
match rt.results.blocking_send(Err(e)) {
Ok(_) => {}
Err(_) => {
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
error!("Can not publish error");
}
}
return;
}
let mut rpos = ec as u64;
let mut apos = rpos / rt.buflen * rt.buflen;
let mut prc = 0;
loop {
let ts1 = Instant::now();
while apos < rpos + rt.read_queue_len as u64 * rt.buflen {
if DO_TRACE {
trace!("READAHEAD wid {wid} fd {fd} apos {apos}");
}
let n = unsafe { libc::readahead(fd, apos as _, rt.buflen as _) };
if n == -1 {
let errno = unsafe { *libc::__errno_location() };
let msg = format!("READAHEAD ERROR wid {wid} errno {errno} fd {fd} apos {apos}");
warn!("{}", msg);
// TODO use a more structured error
let e = Error::with_msg_no_trace(msg);
match rt.results.blocking_send(Err(e)) {
Ok(_) => {}
Err(_) => {
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
warn!("can not publish the read result wid {wid}");
}
}
} else {
apos += rt.buflen;
}
}
if DO_TRACE {
trace!("READ wid {wid} fd {fd} rpos {rpos}");
}
let mut buf = BytesMut::with_capacity(rt.buflen as usize);
let bufptr = buf.as_mut_ptr() as _;
let buflen = buf.capacity() as _;
let ec = unsafe { libc::read(fd, bufptr, buflen) };
prc += 1;
if ec == -1 {
let errno = unsafe { *libc::__errno_location() };
{
let msg = format!("READ ERROR wid {wid} errno {errno} fd {fd} offset {rpos}");
warn!("{}", msg);
// TODO use a more structured error
let e = Error::with_msg_no_trace(msg);
match rt.results.blocking_send(Err(e)) {
Ok(_) => {}
Err(_) => {
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
warn!("can not publish the read result wid {wid}");
return;
}
}
}
} else if ec == 0 {
debug!("READ EOF wid {wid} prc {prc} fd {fd} offset {rpos} prc {prc}");
let rr = ReadResult { buf, eof: true };
match rt.results.blocking_send(Ok(rr)) {
Ok(_) => {}
Err(_) => {
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
warn!("can not publish the read result wid {wid}");
return;
}
}
return;
} else if ec > 0 {
if ec as usize > buf.capacity() {
error!("READ TOOLARGE wid {wid} ec {ec} fd {fd} offset {rpos} prc {prc}");
return;
} else {
rpos += ec as u64;
unsafe { buf.set_len(buf.len() + (ec as usize)) };
{
let ts2 = Instant::now();
let dur = ts2.duration_since(ts1);
let dms = 1e3 * dur.as_secs_f32();
if DO_TRACE {
trace!("READ DONE wid {wid} ec {ec} fd {fd} prc {prc} dms {dms:.2}");
}
let rr = ReadResult { buf, eof: false };
match rt.results.blocking_send(Ok(rr)) {
Ok(_) => {}
Err(_) => {
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
warn!("can not publish the read result wid {wid}");
return;
}
}
}
}
} else {
error!("READ UNEXPECTED wid {wid} ec {ec} fd {fd} offset {rpos}");
return;
}
}
}
}

View File

@@ -0,0 +1,50 @@
use items_0::streamitem::LogItem;
use netpod::log::*;
use std::collections::VecDeque;
pub struct Streamlog {
items: VecDeque<LogItem>,
node_ix: u32,
}
impl Streamlog {
pub fn new(node_ix: u32) -> Self {
Self {
items: VecDeque::new(),
node_ix,
}
}
pub fn append(&mut self, level: Level, msg: String) {
let item = LogItem {
node_ix: self.node_ix,
level,
msg,
};
self.items.push_back(item);
}
pub fn pop(&mut self) -> Option<LogItem> {
self.items.pop_back()
}
pub fn emit(item: &LogItem) {
match item.level {
Level::ERROR => {
error!("StreamLog Node {} {}", item.node_ix, item.msg);
}
Level::WARN => {
warn!("StreamLog Node {} {}", item.node_ix, item.msg);
}
Level::INFO => {
info!("StreamLog Node {} {}", item.node_ix, item.msg);
}
Level::DEBUG => {
debug!("StreamLog Node {} {}", item.node_ix, item.msg);
}
Level::TRACE => {
trace!("StreamLog Node {} {}", item.node_ix, item.msg);
}
}
}
}