Move workspace crates into subfolder
This commit is contained in:
230
crates/disk/src/agg/binnedt.rs
Normal file
230
crates/disk/src/agg/binnedt.rs
Normal file
@@ -0,0 +1,230 @@
|
||||
use futures_core::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use items::{RangeCompletableItem, Sitemty, StreamItem, TimeBinnableType, TimeBinnableTypeAggregator};
|
||||
use netpod::log::*;
|
||||
use netpod::BinnedRange;
|
||||
use netpod::NanoRange;
|
||||
use std::collections::VecDeque;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
pub trait TimeBinningChoice {
|
||||
type Output: TimeBinnableType;
|
||||
type Aggregator: TimeBinnableTypeAggregator<Input = Self, Output = Self::Output> + Send + Unpin;
|
||||
fn aggregator(range: NanoRange, bin_count: usize) -> Self::Aggregator;
|
||||
}
|
||||
|
||||
pub struct TimeWeightedBinMethodMarker {}
|
||||
|
||||
pub struct TBinnerStreamPlay<S, TBT>
|
||||
where
|
||||
S: Stream<Item = Sitemty<TBT>>,
|
||||
TBT: TimeBinnableType,
|
||||
{
|
||||
#[allow(unused)]
|
||||
inp: Pin<Box<S>>,
|
||||
#[allow(unused)]
|
||||
left: Option<Poll<Option<Sitemty<TBT>>>>,
|
||||
//aggtor: Option<<TBT as TimeBinnableType>::Aggregator>,
|
||||
#[allow(unused)]
|
||||
a: Option<TBT>,
|
||||
}
|
||||
|
||||
pub struct TBinnerStream<S, TBT>
|
||||
where
|
||||
S: Stream<Item = Sitemty<TBT>>,
|
||||
TBT: TimeBinnableType,
|
||||
{
|
||||
inp: Pin<Box<S>>,
|
||||
spec: BinnedRange,
|
||||
curbin: u32,
|
||||
left: Option<Poll<Option<Sitemty<TBT>>>>,
|
||||
aggtor: Option<<TBT as TimeBinnableType>::Aggregator>,
|
||||
tmp_agg_results: VecDeque<<<TBT as TimeBinnableType>::Aggregator as TimeBinnableTypeAggregator>::Output>,
|
||||
inp_completed: bool,
|
||||
all_bins_emitted: bool,
|
||||
range_complete_observed: bool,
|
||||
range_complete_emitted: bool,
|
||||
errored: bool,
|
||||
completed: bool,
|
||||
}
|
||||
|
||||
impl<S, TBT> TBinnerStream<S, TBT>
|
||||
where
|
||||
S: Stream<Item = Sitemty<TBT>> + Send + Unpin + 'static,
|
||||
TBT: TimeBinnableType,
|
||||
{
|
||||
pub fn new(inp: S, spec: BinnedRange, x_bin_count: usize, do_time_weight: bool) -> Self {
|
||||
let range = spec.get_range(0);
|
||||
Self {
|
||||
inp: Box::pin(inp),
|
||||
spec,
|
||||
curbin: 0,
|
||||
left: None,
|
||||
aggtor: Some(<TBT as TimeBinnableType>::aggregator(
|
||||
range,
|
||||
x_bin_count,
|
||||
do_time_weight,
|
||||
)),
|
||||
tmp_agg_results: VecDeque::new(),
|
||||
inp_completed: false,
|
||||
all_bins_emitted: false,
|
||||
range_complete_observed: false,
|
||||
range_complete_emitted: false,
|
||||
errored: false,
|
||||
completed: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn cur(&mut self, cx: &mut Context) -> Poll<Option<Sitemty<TBT>>> {
|
||||
if let Some(cur) = self.left.take() {
|
||||
cur
|
||||
} else if self.inp_completed {
|
||||
Poll::Ready(None)
|
||||
} else {
|
||||
let inp_poll_span = span!(Level::TRACE, "into_t_inp_poll");
|
||||
let t = inp_poll_span.in_scope(|| self.inp.poll_next_unpin(cx));
|
||||
if false {
|
||||
// TODO collect as stats:
|
||||
use Poll::*;
|
||||
match &t {
|
||||
Ready(item) => match item {
|
||||
Some(item) => match item {
|
||||
Ok(item) => match item {
|
||||
StreamItem::DataItem(item) => match item {
|
||||
RangeCompletableItem::Data(item) => {
|
||||
info!("time binner got batch len {}", item.len());
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
t
|
||||
}
|
||||
}
|
||||
|
||||
// TODO handle unwrap error, or use a mem replace type instead of option:
|
||||
fn cycle_current_bin(&mut self, expand: bool) {
|
||||
self.curbin += 1;
|
||||
let ret = self
|
||||
.aggtor
|
||||
.as_mut()
|
||||
.unwrap()
|
||||
.result_reset(self.spec.get_range(self.curbin), expand);
|
||||
// TODO should we accumulate bins before emit? Maybe not, we want to stay responsive.
|
||||
// Only if the frequency would be high, that would require cpu time checks. Worth it? Measure..
|
||||
self.tmp_agg_results.push_back(ret);
|
||||
if self.curbin >= self.spec.bin_count() as u32 {
|
||||
self.all_bins_emitted = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn handle(
|
||||
&mut self,
|
||||
cur: Poll<Option<Sitemty<TBT>>>,
|
||||
) -> Option<Poll<Option<Sitemty<<<TBT as TimeBinnableType>::Aggregator as TimeBinnableTypeAggregator>::Output>>>>
|
||||
{
|
||||
use Poll::*;
|
||||
match cur {
|
||||
Ready(Some(Ok(item))) => match item {
|
||||
StreamItem::Log(item) => Some(Ready(Some(Ok(StreamItem::Log(item))))),
|
||||
StreamItem::Stats(item) => Some(Ready(Some(Ok(StreamItem::Stats(item))))),
|
||||
StreamItem::DataItem(item) => match item {
|
||||
RangeCompletableItem::RangeComplete => {
|
||||
self.range_complete_observed = true;
|
||||
None
|
||||
}
|
||||
RangeCompletableItem::Data(item) => {
|
||||
if self.all_bins_emitted {
|
||||
// Just drop the item because we will not emit anymore data.
|
||||
// TODO gather stats.
|
||||
None
|
||||
} else {
|
||||
let ag = self.aggtor.as_mut().unwrap();
|
||||
if item.ends_before(ag.range().clone()) {
|
||||
None
|
||||
} else if item.starts_after(ag.range().clone()) {
|
||||
self.left =
|
||||
Some(Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))))));
|
||||
self.cycle_current_bin(true);
|
||||
// TODO cycle_current_bin enqueues the bin, can I return here instead?
|
||||
None
|
||||
} else {
|
||||
ag.ingest(&item);
|
||||
if item.ends_after(ag.range().clone()) {
|
||||
self.left =
|
||||
Some(Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item))))));
|
||||
self.cycle_current_bin(true);
|
||||
}
|
||||
// TODO cycle_current_bin enqueues the bin, can I return here instead?
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
Ready(Some(Err(e))) => {
|
||||
self.errored = true;
|
||||
Some(Ready(Some(Err(e))))
|
||||
}
|
||||
Ready(None) => {
|
||||
self.inp_completed = true;
|
||||
if self.all_bins_emitted {
|
||||
None
|
||||
} else {
|
||||
self.cycle_current_bin(false);
|
||||
// TODO cycle_current_bin enqueues the bin, can I return here instead?
|
||||
None
|
||||
}
|
||||
}
|
||||
Pending => Some(Pending),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S, TBT> Stream for TBinnerStream<S, TBT>
|
||||
where
|
||||
S: Stream<Item = Sitemty<TBT>> + Send + Unpin + 'static,
|
||||
TBT: TimeBinnableType + Send + Unpin + 'static,
|
||||
<TBT as TimeBinnableType>::Aggregator: Unpin,
|
||||
<<TBT as TimeBinnableType>::Aggregator as TimeBinnableTypeAggregator>::Output: Unpin,
|
||||
{
|
||||
type Item = Sitemty<<<TBT as TimeBinnableType>::Aggregator as TimeBinnableTypeAggregator>::Output>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
'outer: loop {
|
||||
break if self.completed {
|
||||
panic!("poll_next on completed");
|
||||
} else if self.errored {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else if let Some(item) = self.tmp_agg_results.pop_front() {
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))))
|
||||
} else if self.range_complete_emitted {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else if self.inp_completed && self.all_bins_emitted {
|
||||
self.range_complete_emitted = true;
|
||||
if self.range_complete_observed {
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
|
||||
} else {
|
||||
continue 'outer;
|
||||
}
|
||||
} else {
|
||||
let cur = self.cur(cx);
|
||||
match self.handle(cur) {
|
||||
Some(item) => item,
|
||||
None => continue 'outer,
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
1
crates/disk/src/agg/scalarbinbatch.rs
Normal file
1
crates/disk/src/agg/scalarbinbatch.rs
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
crates/disk/src/agg/streams.rs
Normal file
1
crates/disk/src/agg/streams.rs
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
158
crates/disk/src/aggtest.rs
Normal file
158
crates/disk/src/aggtest.rs
Normal file
@@ -0,0 +1,158 @@
|
||||
use crate::eventblobs::EventChunkerMultifile;
|
||||
use crate::eventchunker::EventChunkerConf;
|
||||
use crate::AggQuerySingleChannel;
|
||||
use crate::SfDbChConf;
|
||||
use err::Error;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::test_data_base_path_databuffer;
|
||||
use netpod::timeunits::*;
|
||||
use netpod::ByteOrder;
|
||||
use netpod::ByteSize;
|
||||
use netpod::DiskIoTune;
|
||||
use netpod::DtNano;
|
||||
use netpod::Node;
|
||||
use netpod::ScalarType;
|
||||
use netpod::SfChFetchInfo;
|
||||
use netpod::SfDatabuffer;
|
||||
use netpod::SfDbChannel;
|
||||
use netpod::Shape;
|
||||
|
||||
pub fn make_test_node(id: u32) -> Node {
|
||||
Node {
|
||||
host: "localhost".into(),
|
||||
listen: "0.0.0.0".into(),
|
||||
port: 8800 + id as u16,
|
||||
port_raw: 8800 + id as u16 + 100,
|
||||
// TODO use a common function to supply the tmp path.
|
||||
cache_base_path: test_data_base_path_databuffer().join(format!("node{:02}", id)),
|
||||
sf_databuffer: Some(SfDatabuffer {
|
||||
data_base_path: test_data_base_path_databuffer().join(format!("node{:02}", id)),
|
||||
ksprefix: "ks".into(),
|
||||
splits: None,
|
||||
}),
|
||||
archiver_appliance: None,
|
||||
channel_archiver: None,
|
||||
prometheus_api_bind: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agg_x_dim_0() {
|
||||
taskrun::run(async {
|
||||
agg_x_dim_0_inner().await;
|
||||
Ok::<_, Error>(())
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn agg_x_dim_0_inner() {
|
||||
let node = make_test_node(0);
|
||||
let query = AggQuerySingleChannel {
|
||||
channel_config: SfDbChConf {
|
||||
channel: SfDbChannel::from_name("sf-databuffer", "S10BC01-DBAM070:EOM1_T1"),
|
||||
keyspace: 2,
|
||||
time_bin_size: DtNano::from_ns(DAY),
|
||||
array: false,
|
||||
shape: Shape::Scalar,
|
||||
scalar_type: ScalarType::F64,
|
||||
byte_order: ByteOrder::Big,
|
||||
compression: true,
|
||||
},
|
||||
timebin: 18723,
|
||||
tb_file_count: 1,
|
||||
buffer_size: 1024 * 4,
|
||||
};
|
||||
let fetch_info = SfChFetchInfo::new(
|
||||
"sf-databuffer",
|
||||
"S10BC01-DBAM070:EOM1_T1",
|
||||
2,
|
||||
DtNano::from_ns(DAY),
|
||||
ByteOrder::Big,
|
||||
ScalarType::F64,
|
||||
Shape::Scalar,
|
||||
);
|
||||
let _bin_count = 20;
|
||||
let ts1 = query.timebin as u64 * query.channel_config.time_bin_size.ns();
|
||||
let ts2 = ts1 + HOUR * 24;
|
||||
let range = NanoRange { beg: ts1, end: ts2 };
|
||||
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
|
||||
// TODO let upstream already provide DiskIoTune:
|
||||
let mut disk_io_tune = DiskIoTune::default_for_testing();
|
||||
disk_io_tune.read_buffer_len = query.buffer_size as usize;
|
||||
let fut1 = EventChunkerMultifile::new(
|
||||
range.clone(),
|
||||
fetch_info,
|
||||
node.clone(),
|
||||
0,
|
||||
disk_io_tune,
|
||||
event_chunker_conf,
|
||||
false,
|
||||
true,
|
||||
// TODO
|
||||
32,
|
||||
);
|
||||
let _ = fut1;
|
||||
// TODO add the binning and expectation and await the result.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agg_x_dim_1() {
|
||||
taskrun::run(async {
|
||||
agg_x_dim_1_inner().await;
|
||||
Ok::<_, Error>(())
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
async fn agg_x_dim_1_inner() {
|
||||
// sf-databuffer
|
||||
// /data/sf-databuffer/daq_swissfel/daq_swissfel_3/byTime/S10BC01-DBAM070\:BAM_CH1_NORM/*
|
||||
// S10BC01-DBAM070:BAM_CH1_NORM
|
||||
let node = make_test_node(0);
|
||||
let query = AggQuerySingleChannel {
|
||||
channel_config: SfDbChConf {
|
||||
channel: SfDbChannel::from_name("ks", "wave1"),
|
||||
keyspace: 3,
|
||||
time_bin_size: DtNano::from_ns(DAY),
|
||||
array: true,
|
||||
shape: Shape::Wave(1024),
|
||||
scalar_type: ScalarType::F64,
|
||||
byte_order: ByteOrder::Big,
|
||||
compression: true,
|
||||
},
|
||||
timebin: 0,
|
||||
tb_file_count: 1,
|
||||
buffer_size: 17,
|
||||
};
|
||||
let fetch_info = SfChFetchInfo::new(
|
||||
"ks",
|
||||
"wave1",
|
||||
2,
|
||||
DtNano::from_ns(DAY),
|
||||
ByteOrder::Big,
|
||||
ScalarType::F64,
|
||||
Shape::Scalar,
|
||||
);
|
||||
let _bin_count = 10;
|
||||
let ts1 = query.timebin as u64 * query.channel_config.time_bin_size.ns();
|
||||
let ts2 = ts1 + HOUR * 24;
|
||||
let range = NanoRange { beg: ts1, end: ts2 };
|
||||
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
|
||||
// TODO let upstream already provide DiskIoTune:
|
||||
let mut disk_io_tune = DiskIoTune::default_for_testing();
|
||||
disk_io_tune.read_buffer_len = query.buffer_size as usize;
|
||||
let fut1 = super::eventblobs::EventChunkerMultifile::new(
|
||||
range.clone(),
|
||||
fetch_info,
|
||||
node.clone(),
|
||||
0,
|
||||
disk_io_tune,
|
||||
event_chunker_conf,
|
||||
false,
|
||||
true,
|
||||
// TODO
|
||||
32,
|
||||
);
|
||||
let _ = fut1;
|
||||
// TODO add the binning and expectation and await the result.
|
||||
}
|
||||
264
crates/disk/src/binned/binnedfrompbv.rs
Normal file
264
crates/disk/src/binned/binnedfrompbv.rs
Normal file
@@ -0,0 +1,264 @@
|
||||
use crate::agg::binnedt::TBinnerStream;
|
||||
use crate::binned::query::PreBinnedQuery;
|
||||
use crate::cache::node_ix_for_patch;
|
||||
use err::Error;
|
||||
use futures_core::Stream;
|
||||
use futures_util::{FutureExt, StreamExt};
|
||||
use http::{StatusCode, Uri};
|
||||
use httpclient::HttpBodyAsAsyncRead;
|
||||
use items::frame::decode_frame;
|
||||
use items::{FrameDecodable, FrameType, FrameTypeInnerStatic, TimeBinnableType};
|
||||
use items::{RangeCompletableItem, Sitemty, StreamItem};
|
||||
use netpod::log::*;
|
||||
use netpod::query::CacheUsage;
|
||||
use netpod::x_bin_count;
|
||||
use netpod::PreBinnedPatchIterator;
|
||||
use netpod::{AggKind, AppendToUrl, BinnedRange, ByteSize, Channel, NodeConfigCached, PerfOpts, ScalarType, Shape};
|
||||
use std::future::ready;
|
||||
use std::marker::PhantomData;
|
||||
use std::pin::Pin;
|
||||
use std::str::FromStr;
|
||||
use std::task::{Context, Poll};
|
||||
use streams::frames::inmem::InMemoryFrameAsyncReadStream;
|
||||
use url::Url;
|
||||
|
||||
pub struct FetchedPreBinned<TBT> {
|
||||
uri: Uri,
|
||||
resfut: Option<hyper::client::ResponseFuture>,
|
||||
res: Option<InMemoryFrameAsyncReadStream<HttpBodyAsAsyncRead>>,
|
||||
errored: bool,
|
||||
completed: bool,
|
||||
_m1: PhantomData<TBT>,
|
||||
}
|
||||
|
||||
impl<TBT> FetchedPreBinned<TBT> {
|
||||
pub fn new(query: &PreBinnedQuery, host: String, port: u16) -> Result<Self, Error>
|
||||
where
|
||||
TBT: FrameTypeInnerStatic + TimeBinnableType,
|
||||
Sitemty<TBT>: FrameDecodable,
|
||||
{
|
||||
// TODO should not assume http:
|
||||
let mut url = Url::parse(&format!("http://{host}:{port}/api/4/prebinned"))?;
|
||||
query.append_to_url(&mut url);
|
||||
let ret = Self {
|
||||
uri: Uri::from_str(&url.to_string()).map_err(Error::from_string)?,
|
||||
resfut: None,
|
||||
res: None,
|
||||
errored: false,
|
||||
completed: false,
|
||||
_m1: PhantomData,
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl<TBT> Stream for FetchedPreBinned<TBT>
|
||||
where
|
||||
TBT: FrameTypeInnerStatic + TimeBinnableType,
|
||||
Sitemty<TBT>: FrameDecodable,
|
||||
{
|
||||
type Item = Sitemty<TBT>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
'outer: loop {
|
||||
break if self.completed {
|
||||
panic!("poll_next on completed");
|
||||
} else if self.errored {
|
||||
self.completed = true;
|
||||
return Ready(None);
|
||||
} else if let Some(res) = self.res.as_mut() {
|
||||
match res.poll_next_unpin(cx) {
|
||||
Ready(Some(Ok(item))) => match item {
|
||||
StreamItem::Log(item) => Ready(Some(Ok(StreamItem::Log(item)))),
|
||||
StreamItem::Stats(item) => Ready(Some(Ok(StreamItem::Stats(item)))),
|
||||
StreamItem::DataItem(item) => match decode_frame::<Sitemty<TBT>>(&item) {
|
||||
Ok(Ok(item)) => Ready(Some(Ok(item))),
|
||||
Ok(Err(e)) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Err(e) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
},
|
||||
},
|
||||
Ready(Some(Err(e))) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Ready(None) => {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
} else if let Some(resfut) = self.resfut.as_mut() {
|
||||
match resfut.poll_unpin(cx) {
|
||||
Ready(res) => match res {
|
||||
Ok(res) => {
|
||||
if res.status() == StatusCode::OK {
|
||||
let perf_opts = PerfOpts { inmem_bufcap: 512 };
|
||||
let s1 = HttpBodyAsAsyncRead::new(res);
|
||||
let s2 = InMemoryFrameAsyncReadStream::new(s1, perf_opts.inmem_bufcap);
|
||||
self.res = Some(s2);
|
||||
continue 'outer;
|
||||
} else {
|
||||
let msg =
|
||||
format!("PreBinnedValueFetchedStream non-OK result from sub request: {res:?}");
|
||||
error!("{msg}");
|
||||
let e = Error::with_msg_no_trace(msg);
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("PreBinnedValueStream error in stream {e:?}");
|
||||
self.errored = true;
|
||||
Ready(Some(Err(Error::from_string(e))))
|
||||
}
|
||||
},
|
||||
Pending => Pending,
|
||||
}
|
||||
} else {
|
||||
match hyper::Request::builder()
|
||||
.method(http::Method::GET)
|
||||
.uri(&self.uri)
|
||||
.body(hyper::Body::empty())
|
||||
{
|
||||
Ok(req) => {
|
||||
let client = hyper::Client::new();
|
||||
self.resfut = Some(client.request(req));
|
||||
continue 'outer;
|
||||
}
|
||||
Err(e) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(Error::from_string(e))))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate bins from a range of pre-binned patches.
|
||||
///
|
||||
/// Takes an iterator over the necessary patches.
|
||||
pub struct BinnedFromPreBinned<TBT>
|
||||
where
|
||||
TBT: TimeBinnableType,
|
||||
{
|
||||
// TODO get rid of box:
|
||||
inp: Pin<Box<dyn Stream<Item = Sitemty<TBT>> + Send>>,
|
||||
_m1: PhantomData<TBT>,
|
||||
}
|
||||
|
||||
impl<TBT> BinnedFromPreBinned<TBT>
|
||||
where
|
||||
TBT: TimeBinnableType<Output = TBT> + Unpin + 'static,
|
||||
Sitemty<TBT>: FrameType + FrameDecodable,
|
||||
{
|
||||
pub fn new(
|
||||
patch_it: PreBinnedPatchIterator,
|
||||
channel: Channel,
|
||||
range: BinnedRange,
|
||||
scalar_type: ScalarType,
|
||||
shape: Shape,
|
||||
agg_kind: AggKind,
|
||||
cache_usage: CacheUsage,
|
||||
disk_io_buffer_size: usize,
|
||||
node_config: &NodeConfigCached,
|
||||
disk_stats_every: ByteSize,
|
||||
report_error: bool,
|
||||
) -> Result<Self, Error> {
|
||||
let patches: Vec<_> = patch_it.collect();
|
||||
let mut sp = String::new();
|
||||
if false {
|
||||
// Convert this to a StreamLog message:
|
||||
for (i, p) in patches.iter().enumerate() {
|
||||
use std::fmt::Write;
|
||||
write!(sp, " • patch {i:2} {p:?}\n")?;
|
||||
}
|
||||
info!("Using these pre-binned patches:\n{sp}");
|
||||
}
|
||||
let pmax = patches.len();
|
||||
let inp = futures_util::stream::iter(patches.into_iter().enumerate())
|
||||
.map({
|
||||
let shape = shape.clone();
|
||||
let agg_kind = agg_kind.clone();
|
||||
let node_config = node_config.clone();
|
||||
move |(pix, patch)| {
|
||||
let query = PreBinnedQuery::new(
|
||||
patch,
|
||||
channel.clone(),
|
||||
scalar_type.clone(),
|
||||
shape.clone(),
|
||||
agg_kind.clone(),
|
||||
cache_usage.clone(),
|
||||
disk_io_buffer_size,
|
||||
disk_stats_every.clone(),
|
||||
report_error,
|
||||
);
|
||||
let nodeix = node_ix_for_patch(&query.patch(), &query.channel(), &node_config.node_config.cluster);
|
||||
let node = &node_config.node_config.cluster.nodes[nodeix as usize];
|
||||
let ret: Pin<Box<dyn Stream<Item = _> + Send>> =
|
||||
match FetchedPreBinned::<TBT>::new(&query, node.host.clone(), node.port.clone()) {
|
||||
Ok(stream) => Box::pin(stream.map(move |q| (pix, q))),
|
||||
Err(e) => {
|
||||
error!("error from PreBinnedValueFetchedStream::new {e:?}");
|
||||
Box::pin(futures_util::stream::iter(vec![(pix, Err(e))]))
|
||||
}
|
||||
};
|
||||
ret
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
.filter_map({
|
||||
let range = range.clone();
|
||||
move |(pix, k)| {
|
||||
let fit_range = range.full_range();
|
||||
let g = match k {
|
||||
Ok(item) => match item {
|
||||
StreamItem::Log(item) => Some(Ok(StreamItem::Log(item))),
|
||||
StreamItem::Stats(item) => Some(Ok(StreamItem::Stats(item))),
|
||||
StreamItem::DataItem(item) => match item {
|
||||
RangeCompletableItem::RangeComplete => {
|
||||
if pix + 1 == pmax {
|
||||
Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
RangeCompletableItem::Data(item) => {
|
||||
match crate::binned::FilterFittingInside::filter_fitting_inside(item, fit_range) {
|
||||
Some(item) => Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
Err(e) => Some(Err(e)),
|
||||
};
|
||||
ready(g)
|
||||
}
|
||||
});
|
||||
let inp = TBinnerStream::<_, TBT>::new(inp, range, x_bin_count(&shape, &agg_kind), agg_kind.do_time_weighted());
|
||||
Ok(Self {
|
||||
inp: Box::pin(inp),
|
||||
_m1: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<TBT> Stream for BinnedFromPreBinned<TBT>
|
||||
where
|
||||
TBT: TimeBinnableType<Output = TBT> + Unpin + 'static,
|
||||
Sitemty<TBT>: FrameType + FrameDecodable,
|
||||
{
|
||||
type Item = Sitemty<TBT>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
self.inp.poll_next_unpin(cx)
|
||||
}
|
||||
}
|
||||
1
crates/disk/src/binned/dim1.rs
Normal file
1
crates/disk/src/binned/dim1.rs
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
537
crates/disk/src/binned/pbv.rs
Normal file
537
crates/disk/src/binned/pbv.rs
Normal file
@@ -0,0 +1,537 @@
|
||||
use crate::agg::binnedt::TBinnerStream;
|
||||
use crate::binned::binnedfrompbv::FetchedPreBinned;
|
||||
use crate::binned::query::PreBinnedQuery;
|
||||
use crate::binned::WithLen;
|
||||
use crate::cache::{write_pb_cache_min_max_avg_scalar, CacheFileDesc, WrittenPbCache};
|
||||
use crate::decode::{Endianness, EventValueFromBytes, EventValueShape, NumFromBytes};
|
||||
use crate::merge::mergedfromremotes::MergedFromRemotes;
|
||||
use crate::streamlog::Streamlog;
|
||||
use err::Error;
|
||||
use futures_core::Stream;
|
||||
use futures_util::{FutureExt, StreamExt};
|
||||
use items::numops::NumOps;
|
||||
use items::{
|
||||
Appendable, Clearable, EventsNodeProcessor, EventsTypeAliases, FrameDecodable, FrameType, PushableIndex,
|
||||
RangeCompletableItem, ReadableFromFile, Sitemty, StreamItem, TimeBinnableType,
|
||||
};
|
||||
use netpod::log::*;
|
||||
use netpod::query::{CacheUsage, RawEventsQuery};
|
||||
use netpod::x_bin_count;
|
||||
use netpod::{AggKind, BinnedRange, PreBinnedPatchIterator, PreBinnedPatchRange};
|
||||
use netpod::{NodeConfigCached, PerfOpts};
|
||||
use serde::Serialize;
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::marker::PhantomData;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tokio::fs::{File, OpenOptions};
|
||||
|
||||
pub struct PreBinnedValueStream<NTY, END, EVS, ENP>
|
||||
where
|
||||
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + 'static,
|
||||
END: Endianness + 'static,
|
||||
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END> + 'static,
|
||||
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch>,
|
||||
{
|
||||
query: PreBinnedQuery,
|
||||
agg_kind: AggKind,
|
||||
node_config: NodeConfigCached,
|
||||
open_check_local_file: Option<Pin<Box<dyn Future<Output = Result<File, io::Error>> + Send>>>,
|
||||
stream_from_other_inputs:
|
||||
Option<Pin<Box<dyn Stream<Item = Sitemty<<ENP as EventsTypeAliases>::TimeBinOutput>> + Send>>>,
|
||||
read_from_cache: bool,
|
||||
cache_written: bool,
|
||||
data_complete: bool,
|
||||
range_complete_observed: bool,
|
||||
range_complete_emitted: bool,
|
||||
errored: bool,
|
||||
all_done: bool,
|
||||
completed: bool,
|
||||
streamlog: Streamlog,
|
||||
values: Option<<ENP as EventsTypeAliases>::TimeBinOutput>,
|
||||
write_fut: Option<Pin<Box<dyn Future<Output = Result<WrittenPbCache, Error>> + Send>>>,
|
||||
read_cache_fut: Option<Pin<Box<dyn Future<Output = Sitemty<<ENP as EventsTypeAliases>::TimeBinOutput>> + Send>>>,
|
||||
_m1: PhantomData<NTY>,
|
||||
_m2: PhantomData<END>,
|
||||
_m3: PhantomData<EVS>,
|
||||
_m4: PhantomData<ENP>,
|
||||
}
|
||||
|
||||
impl<NTY, END, EVS, ENP> PreBinnedValueStream<NTY, END, EVS, ENP>
|
||||
where
|
||||
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + 'static,
|
||||
END: Endianness + 'static,
|
||||
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END> + 'static,
|
||||
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch> + 'static,
|
||||
<ENP as EventsNodeProcessor>::Output: PushableIndex + Appendable + Clearable,
|
||||
// TODO is this needed:
|
||||
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType,
|
||||
// TODO who exactly needs this DeserializeOwned?
|
||||
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>: FrameType + FrameDecodable,
|
||||
{
|
||||
pub fn new(query: PreBinnedQuery, agg_kind: AggKind, node_config: &NodeConfigCached) -> Self {
|
||||
Self {
|
||||
query,
|
||||
agg_kind,
|
||||
node_config: node_config.clone(),
|
||||
open_check_local_file: None,
|
||||
stream_from_other_inputs: None,
|
||||
read_from_cache: false,
|
||||
cache_written: false,
|
||||
data_complete: false,
|
||||
range_complete_observed: false,
|
||||
range_complete_emitted: false,
|
||||
errored: false,
|
||||
all_done: false,
|
||||
completed: false,
|
||||
streamlog: Streamlog::new(node_config.ix as u32),
|
||||
// TODO use alias via some trait associated type:
|
||||
//values: <<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output as Appendable>::empty(),
|
||||
values: None,
|
||||
write_fut: None,
|
||||
read_cache_fut: None,
|
||||
_m1: PhantomData,
|
||||
_m2: PhantomData,
|
||||
_m3: PhantomData,
|
||||
_m4: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn setup_merged_from_remotes(
|
||||
&mut self,
|
||||
) -> Result<
|
||||
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
|
||||
Error,
|
||||
> {
|
||||
// TODO let PreBinnedQuery provide the tune and pass to RawEventsQuery:
|
||||
let evq = RawEventsQuery::new(
|
||||
self.query.channel().clone(),
|
||||
self.query.patch().patch_range(),
|
||||
self.query.agg_kind().clone(),
|
||||
);
|
||||
if self.query.patch().patch_t_len() % self.query.patch().bin_t_len() != 0 {
|
||||
let msg = format!(
|
||||
"Patch length inconsistency {} {}",
|
||||
self.query.patch().patch_t_len(),
|
||||
self.query.patch().bin_t_len()
|
||||
);
|
||||
error!("{}", msg);
|
||||
return Err(Error::with_msg(msg));
|
||||
}
|
||||
// TODO do I need to set up more transformations or binning to deliver the requested data?
|
||||
let count = self.query.patch().patch_t_len() / self.query.patch().bin_t_len();
|
||||
let range = BinnedRange::covering_range(evq.range.clone(), count as u32)?;
|
||||
let perf_opts = PerfOpts { inmem_bufcap: 512 };
|
||||
let s = MergedFromRemotes::<ENP>::new(evq, perf_opts, self.node_config.node_config.cluster.clone());
|
||||
let ret = TBinnerStream::<_, <ENP as EventsNodeProcessor>::Output>::new(
|
||||
s,
|
||||
range,
|
||||
x_bin_count(&self.query.shape().clone(), &self.agg_kind),
|
||||
self.agg_kind.do_time_weighted(),
|
||||
);
|
||||
Ok(Box::pin(ret))
|
||||
}
|
||||
|
||||
fn setup_from_higher_res_prebinned(
|
||||
&mut self,
|
||||
range: PreBinnedPatchRange,
|
||||
) -> Result<
|
||||
Pin<Box<dyn Stream<Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>> + Send>>,
|
||||
Error,
|
||||
> {
|
||||
let g = self.query.patch().bin_t_len();
|
||||
let h = range.grid_spec.bin_t_len();
|
||||
trace!(
|
||||
"try_setup_fetch_prebinned_higher_res found g {} h {} ratio {} mod {} {:?}",
|
||||
g,
|
||||
h,
|
||||
g / h,
|
||||
g % h,
|
||||
range,
|
||||
);
|
||||
if g / h <= 1 {
|
||||
let msg = format!("try_setup_fetch_prebinned_higher_res g {} h {}", g, h);
|
||||
return Err(Error::with_msg(msg));
|
||||
}
|
||||
if g / h > 1024 * 10 {
|
||||
let msg = format!("try_setup_fetch_prebinned_higher_res g {} h {}", g, h);
|
||||
return Err(Error::with_msg(msg));
|
||||
}
|
||||
if g % h != 0 {
|
||||
let msg = format!("try_setup_fetch_prebinned_higher_res g {} h {}", g, h);
|
||||
return Err(Error::with_msg(msg));
|
||||
}
|
||||
let node_config = self.node_config.clone();
|
||||
let patch_it = PreBinnedPatchIterator::from_range(range);
|
||||
let s = futures_util::stream::iter(patch_it)
|
||||
.map({
|
||||
let q2 = self.query.clone();
|
||||
let disk_io_buffer_size = self.query.disk_io_buffer_size();
|
||||
let disk_stats_every = self.query.disk_stats_every().clone();
|
||||
let report_error = self.query.report_error();
|
||||
move |patch| {
|
||||
let query = PreBinnedQuery::new(
|
||||
patch,
|
||||
q2.channel().clone(),
|
||||
q2.scalar_type().clone(),
|
||||
q2.shape().clone(),
|
||||
q2.agg_kind().clone(),
|
||||
q2.cache_usage().clone(),
|
||||
disk_io_buffer_size,
|
||||
disk_stats_every.clone(),
|
||||
report_error,
|
||||
);
|
||||
let nodeix = crate::cache::node_ix_for_patch(
|
||||
&query.patch(),
|
||||
&query.channel(),
|
||||
&node_config.node_config.cluster,
|
||||
);
|
||||
let node = &node_config.node_config.cluster.nodes[nodeix as usize];
|
||||
let ret =
|
||||
FetchedPreBinned::<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>::new(
|
||||
&query,
|
||||
node.host.clone(),
|
||||
node.port.clone(),
|
||||
)?;
|
||||
Ok(ret)
|
||||
}
|
||||
})
|
||||
.map(|k| {
|
||||
let s: Pin<Box<dyn Stream<Item = _> + Send>> = match k {
|
||||
Ok(k) => Box::pin(k),
|
||||
Err(e) => Box::pin(futures_util::stream::iter(vec![Err(e)])),
|
||||
};
|
||||
s
|
||||
})
|
||||
.flatten();
|
||||
Ok(Box::pin(s))
|
||||
}
|
||||
|
||||
fn try_setup_fetch_prebinned_higher_res(&mut self) -> Result<(), Error> {
|
||||
info!("try_setup_fetch_prebinned_higher_res");
|
||||
let range = self.query.patch().patch_range();
|
||||
match PreBinnedPatchRange::covering_range(range, self.query.patch().bin_count() + 1) {
|
||||
Ok(Some(range)) => {
|
||||
self.stream_from_other_inputs = Some(self.setup_from_higher_res_prebinned(range)?);
|
||||
}
|
||||
Ok(None) => {
|
||||
self.stream_from_other_inputs = Some(self.setup_merged_from_remotes()?);
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn poll_write_fut(
|
||||
self: &mut Self,
|
||||
mut fut: Pin<Box<dyn Future<Output = Result<WrittenPbCache, Error>> + Send>>,
|
||||
cx: &mut Context,
|
||||
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
|
||||
trace!("poll_write_fut");
|
||||
use Poll::*;
|
||||
match fut.poll_unpin(cx) {
|
||||
Ready(item) => {
|
||||
self.cache_written = true;
|
||||
self.write_fut = None;
|
||||
match item {
|
||||
Ok(res) => {
|
||||
self.streamlog.append(
|
||||
Level::INFO,
|
||||
format!(
|
||||
"cache file written bytes: {} duration {} ms",
|
||||
res.bytes,
|
||||
res.duration.as_millis()
|
||||
),
|
||||
);
|
||||
self.all_done = true;
|
||||
Ready(None)
|
||||
}
|
||||
Err(e) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
}
|
||||
}
|
||||
Pending => {
|
||||
self.write_fut = Some(fut);
|
||||
Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_read_cache_fut(
|
||||
self: &mut Self,
|
||||
mut fut: Pin<
|
||||
Box<
|
||||
dyn Future<
|
||||
Output = Result<
|
||||
StreamItem<RangeCompletableItem<<ENP as EventsTypeAliases>::TimeBinOutput>>,
|
||||
Error,
|
||||
>,
|
||||
> + Send,
|
||||
>,
|
||||
>,
|
||||
cx: &mut Context,
|
||||
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
|
||||
trace!("poll_read_cache_fut");
|
||||
use Poll::*;
|
||||
match fut.poll_unpin(cx) {
|
||||
Ready(item) => {
|
||||
self.read_cache_fut = None;
|
||||
match item {
|
||||
Ok(item) => {
|
||||
self.data_complete = true;
|
||||
self.range_complete_observed = true;
|
||||
Ready(Some(Ok(item)))
|
||||
}
|
||||
Err(e) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
}
|
||||
}
|
||||
Pending => {
|
||||
self.read_cache_fut = Some(fut);
|
||||
Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_data_complete(
|
||||
self: &mut Self,
|
||||
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
|
||||
trace!("handle_data_complete");
|
||||
use Poll::*;
|
||||
if self.cache_written {
|
||||
// TODO can we ever get here?
|
||||
if self.range_complete_observed {
|
||||
self.range_complete_emitted = true;
|
||||
let item = RangeCompletableItem::RangeComplete;
|
||||
Ready(Some(Ok(StreamItem::DataItem(item))))
|
||||
} else {
|
||||
self.all_done = true;
|
||||
Ready(None)
|
||||
}
|
||||
} else if self.read_from_cache {
|
||||
// TODO refactor: raising cache_written even though we did not actually write is misleading.
|
||||
self.cache_written = true;
|
||||
self.all_done = true;
|
||||
Ready(None)
|
||||
} else {
|
||||
match self.query.cache_usage() {
|
||||
CacheUsage::Use | CacheUsage::Recreate => {
|
||||
if let Some(values) = self.values.take() {
|
||||
let msg = format!(
|
||||
"write cache file query: {:?} bin count: {}",
|
||||
self.query.patch(),
|
||||
values.len(),
|
||||
);
|
||||
self.streamlog.append(Level::INFO, msg);
|
||||
let fut = write_pb_cache_min_max_avg_scalar(
|
||||
values,
|
||||
self.query.patch().clone(),
|
||||
self.query.agg_kind().clone(),
|
||||
self.query.channel().clone(),
|
||||
self.node_config.clone(),
|
||||
);
|
||||
self.write_fut = Some(Box::pin(fut));
|
||||
Ready(None)
|
||||
} else {
|
||||
warn!("no values to write to cache");
|
||||
Ready(None)
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// TODO refactor: raising cache_written even though we did not actually write is misleading.
|
||||
self.cache_written = true;
|
||||
self.all_done = true;
|
||||
Ready(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_stream_from_other_inputs(
|
||||
self: &mut Self,
|
||||
mut fut: Pin<
|
||||
Box<
|
||||
dyn Stream<
|
||||
Item = Result<
|
||||
StreamItem<RangeCompletableItem<<ENP as EventsTypeAliases>::TimeBinOutput>>,
|
||||
Error,
|
||||
>,
|
||||
> + Send,
|
||||
>,
|
||||
>,
|
||||
cx: &mut Context,
|
||||
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
|
||||
use Poll::*;
|
||||
match fut.poll_next_unpin(cx) {
|
||||
Ready(Some(k)) => match k {
|
||||
Ok(item) => {
|
||||
self.stream_from_other_inputs = Some(fut);
|
||||
match item {
|
||||
StreamItem::Log(item) => Ready(Some(Ok(StreamItem::Log(item)))),
|
||||
StreamItem::Stats(item) => Ready(Some(Ok(StreamItem::Stats(item)))),
|
||||
StreamItem::DataItem(item) => match item {
|
||||
RangeCompletableItem::RangeComplete => {
|
||||
self.range_complete_observed = true;
|
||||
Ready(None)
|
||||
}
|
||||
RangeCompletableItem::Data(item) => {
|
||||
if let Some(values) = &mut self.values {
|
||||
values.append(&item);
|
||||
} else {
|
||||
let mut values = item.empty_like_self();
|
||||
values.append(&item);
|
||||
self.values = Some(values);
|
||||
}
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
},
|
||||
Ready(None) => {
|
||||
self.data_complete = true;
|
||||
Ready(None)
|
||||
}
|
||||
Pending => {
|
||||
self.stream_from_other_inputs = Some(fut);
|
||||
Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_open_check_local_file(
|
||||
self: &mut Self,
|
||||
mut fut: Pin<Box<dyn Future<Output = Result<File, io::Error>> + Send>>,
|
||||
cx: &mut Context,
|
||||
) -> Poll<Option<Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>>> {
|
||||
use Poll::*;
|
||||
match fut.poll_unpin(cx) {
|
||||
Ready(item) => {
|
||||
match item {
|
||||
Ok(file) => {
|
||||
self.read_from_cache = true;
|
||||
let fut =
|
||||
<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output as ReadableFromFile>::read_from_file(file)?;
|
||||
self.read_cache_fut = Some(Box::pin(fut));
|
||||
// Return Ready(None) to signal that nothing is Pending but we need to get polled again.
|
||||
//continue 'outer;
|
||||
Ready(None)
|
||||
}
|
||||
Err(e) => match e.kind() {
|
||||
// TODO other error kinds
|
||||
io::ErrorKind::NotFound => match self.try_setup_fetch_prebinned_higher_res() {
|
||||
Ok(_) => {
|
||||
if self.stream_from_other_inputs.is_none() {
|
||||
let e =
|
||||
Err(Error::with_msg(format!("try_setup_fetch_prebinned_higher_res failed")));
|
||||
self.errored = true;
|
||||
Ready(Some(e))
|
||||
} else {
|
||||
//continue 'outer;
|
||||
Ready(None)
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let e =
|
||||
Error::with_msg(format!("try_setup_fetch_prebinned_higher_res error: {:?}", e));
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
error!("File I/O error: kind {:?} {:?}\n\n..............", e.kind(), e);
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e.into())))
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
Pending => {
|
||||
self.open_check_local_file = Some(fut);
|
||||
Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! some_or_continue {
|
||||
($x:expr) => {
|
||||
if let Ready(None) = $x {
|
||||
continue;
|
||||
} else {
|
||||
$x
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl<NTY, END, EVS, ENP> Stream for PreBinnedValueStream<NTY, END, EVS, ENP>
|
||||
where
|
||||
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + Unpin + 'static,
|
||||
END: Endianness + Unpin + 'static,
|
||||
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END> + Unpin + 'static,
|
||||
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch> + Unpin + 'static,
|
||||
<ENP as EventsNodeProcessor>::Output: PushableIndex + Appendable + Clearable,
|
||||
// TODO needed?
|
||||
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType,
|
||||
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>: FrameType + FrameDecodable,
|
||||
{
|
||||
type Item = Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
loop {
|
||||
break if self.completed {
|
||||
panic!("PreBinnedValueStream poll_next on completed");
|
||||
} else if self.errored {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else if self.all_done {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else if let Some(item) = self.streamlog.pop() {
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
} else if let Some(fut) = self.write_fut.take() {
|
||||
let x = Self::poll_write_fut(&mut self, fut, cx);
|
||||
some_or_continue!(x)
|
||||
} else if let Some(fut) = self.read_cache_fut.take() {
|
||||
let x = Self::poll_read_cache_fut(&mut self, fut, cx);
|
||||
some_or_continue!(x)
|
||||
} else if self.range_complete_emitted {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else if self.data_complete {
|
||||
let x = Self::handle_data_complete(&mut self);
|
||||
some_or_continue!(x)
|
||||
} else if let Some(fut) = self.stream_from_other_inputs.take() {
|
||||
let x = Self::poll_stream_from_other_inputs(&mut self, fut, cx);
|
||||
some_or_continue!(x)
|
||||
} else if let Some(fut) = self.open_check_local_file.take() {
|
||||
let x = Self::poll_open_check_local_file(&mut self, fut, cx);
|
||||
some_or_continue!(x)
|
||||
} else {
|
||||
let cfd = CacheFileDesc::new(
|
||||
self.query.channel().clone(),
|
||||
self.query.patch().clone(),
|
||||
self.query.agg_kind().clone(),
|
||||
);
|
||||
let path = match self.query.cache_usage() {
|
||||
CacheUsage::Use => cfd.path(&self.node_config),
|
||||
_ => PathBuf::from("DOESNOTEXIST"),
|
||||
};
|
||||
let fut = async { OpenOptions::new().read(true).open(path).await };
|
||||
self.open_check_local_file = Some(Box::pin(fut));
|
||||
continue;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
260
crates/disk/src/binned/prebinned.rs
Normal file
260
crates/disk/src/binned/prebinned.rs
Normal file
@@ -0,0 +1,260 @@
|
||||
use crate::binned::pbv::PreBinnedValueStream;
|
||||
use crate::binned::query::PreBinnedQuery;
|
||||
use crate::cache::node_ix_for_patch;
|
||||
use crate::decode::{
|
||||
BigEndian, Endianness, EventValueFromBytes, EventValueShape, EventValuesDim0Case, EventValuesDim1Case,
|
||||
LittleEndian, NumFromBytes,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use dbconn::bincache::pre_binned_value_stream;
|
||||
use err::Error;
|
||||
use futures_core::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use items::numops::{BoolNum, NumOps, StringNum};
|
||||
use items::{
|
||||
Appendable, Clearable, EventsNodeProcessor, Framable, FrameDecodable, FrameType, FrameTypeInnerDyn, PushableIndex,
|
||||
RangeCompletableItem, Sitemty, StreamItem, TimeBinnableType, TimeBinned,
|
||||
};
|
||||
use netpod::log::*;
|
||||
use netpod::{AggKind, ByteOrder, ChannelTyped, NodeConfigCached, ScalarType, Shape};
|
||||
use serde::Serialize;
|
||||
use std::pin::Pin;
|
||||
|
||||
async fn make_num_pipeline_nty_end_evs_enp<NTY, END, EVS, ENP>(
|
||||
scalar_type: ScalarType,
|
||||
shape: Shape,
|
||||
agg_kind: AggKind,
|
||||
_event_value_shape: EVS,
|
||||
_events_node_proc: ENP,
|
||||
query: PreBinnedQuery,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>, Error>
|
||||
where
|
||||
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + 'static,
|
||||
END: Endianness + 'static,
|
||||
EVS: EventValueShape<NTY, END> + EventValueFromBytes<NTY, END> + 'static,
|
||||
ENP: EventsNodeProcessor<Input = <EVS as EventValueFromBytes<NTY, END>>::Batch> + 'static,
|
||||
<ENP as EventsNodeProcessor>::Output: PushableIndex + Appendable + Clearable + 'static,
|
||||
<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output: FrameTypeInnerDyn + TimeBinned,
|
||||
Sitemty<<ENP as EventsNodeProcessor>::Output>: FrameType + Framable + 'static,
|
||||
Sitemty<<<ENP as EventsNodeProcessor>::Output as TimeBinnableType>::Output>: Framable + FrameType + FrameDecodable,
|
||||
{
|
||||
if let Some(scyconf) = &node_config.node_config.cluster.cache_scylla {
|
||||
trace!("~~~~~~~~~~~~~~~ make_num_pipeline_nty_end_evs_enp using scylla as cache");
|
||||
let chn = ChannelTyped {
|
||||
channel: query.channel().clone(),
|
||||
scalar_type,
|
||||
shape,
|
||||
};
|
||||
let stream = pre_binned_value_stream(
|
||||
chn.channel().series().unwrap(),
|
||||
&chn,
|
||||
query.patch(),
|
||||
agg_kind,
|
||||
query.cache_usage(),
|
||||
scyconf,
|
||||
)
|
||||
.await?;
|
||||
let stream = stream.map(|x| {
|
||||
let ret = match x {
|
||||
Ok(k) => Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))),
|
||||
Err(e) => Err(e),
|
||||
};
|
||||
ret
|
||||
});
|
||||
let stream = Box::pin(stream) as Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>;
|
||||
Ok(stream)
|
||||
} else {
|
||||
let ret = PreBinnedValueStream::<NTY, END, EVS, ENP>::new(query, agg_kind, node_config);
|
||||
let ret = StreamExt::map(ret, |item| {
|
||||
//
|
||||
match item {
|
||||
Ok(StreamItem::DataItem(RangeCompletableItem::Data(k))) => {
|
||||
let g = Box::new(k) as Box<dyn TimeBinned>;
|
||||
Ok(StreamItem::DataItem(RangeCompletableItem::Data(g)))
|
||||
}
|
||||
Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)) => {
|
||||
Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))
|
||||
}
|
||||
Ok(StreamItem::Log(k)) => Ok(StreamItem::Log(k)),
|
||||
Ok(StreamItem::Stats(k)) => Ok(StreamItem::Stats(k)),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
});
|
||||
Ok(Box::pin(ret))
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_num_pipeline_nty_end<NTY, END>(
|
||||
scalar_type: ScalarType,
|
||||
shape: Shape,
|
||||
agg_kind: AggKind,
|
||||
query: PreBinnedQuery,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>, Error>
|
||||
where
|
||||
NTY: NumOps + NumFromBytes<NTY, END> + Serialize + 'static,
|
||||
END: Endianness + 'static,
|
||||
{
|
||||
match shape {
|
||||
Shape::Scalar => {
|
||||
let evs = EventValuesDim0Case::new();
|
||||
match agg_kind {
|
||||
AggKind::EventBlobs => panic!(),
|
||||
AggKind::TimeWeightedScalar | AggKind::DimXBins1 => {
|
||||
let events_node_proc = <<EventValuesDim0Case<NTY> as EventValueShape<NTY, END>>::NumXAggToSingleBin as EventsNodeProcessor>::create(shape.clone(), agg_kind.clone());
|
||||
make_num_pipeline_nty_end_evs_enp::<NTY, END, _, _>(
|
||||
scalar_type,
|
||||
shape,
|
||||
agg_kind,
|
||||
evs,
|
||||
events_node_proc,
|
||||
query,
|
||||
node_config,
|
||||
)
|
||||
.await
|
||||
}
|
||||
AggKind::DimXBinsN(_) => {
|
||||
let events_node_proc = <<EventValuesDim0Case<NTY> as EventValueShape<NTY, END>>::NumXAggToNBins as EventsNodeProcessor>::create(shape.clone(), agg_kind.clone());
|
||||
make_num_pipeline_nty_end_evs_enp::<NTY, END, _, _>(
|
||||
scalar_type,
|
||||
shape,
|
||||
agg_kind,
|
||||
evs,
|
||||
events_node_proc,
|
||||
query,
|
||||
node_config,
|
||||
)
|
||||
.await
|
||||
}
|
||||
AggKind::Plain => {
|
||||
panic!();
|
||||
}
|
||||
AggKind::Stats1 => {
|
||||
// Currently not meant to be binned.
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
}
|
||||
Shape::Wave(n) => {
|
||||
let evs = EventValuesDim1Case::new(n);
|
||||
match agg_kind {
|
||||
AggKind::EventBlobs => panic!(),
|
||||
AggKind::TimeWeightedScalar | AggKind::DimXBins1 => {
|
||||
let events_node_proc = <<EventValuesDim1Case<NTY> as EventValueShape<NTY, END>>::NumXAggToSingleBin as EventsNodeProcessor>::create(shape.clone(), agg_kind.clone());
|
||||
make_num_pipeline_nty_end_evs_enp::<NTY, END, _, _>(
|
||||
scalar_type,
|
||||
shape,
|
||||
agg_kind,
|
||||
evs,
|
||||
events_node_proc,
|
||||
query,
|
||||
node_config,
|
||||
)
|
||||
.await
|
||||
}
|
||||
AggKind::DimXBinsN(_) => {
|
||||
let events_node_proc = <<EventValuesDim1Case<NTY> as EventValueShape<NTY, END>>::NumXAggToNBins as EventsNodeProcessor>::create(shape.clone(), agg_kind.clone());
|
||||
make_num_pipeline_nty_end_evs_enp::<NTY, END, _, _>(
|
||||
scalar_type,
|
||||
shape,
|
||||
agg_kind,
|
||||
evs,
|
||||
events_node_proc,
|
||||
query,
|
||||
node_config,
|
||||
)
|
||||
.await
|
||||
}
|
||||
AggKind::Plain => {
|
||||
panic!();
|
||||
}
|
||||
AggKind::Stats1 => {
|
||||
// Currently not meant to be binned.
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
}
|
||||
Shape::Image(..) => {
|
||||
// TODO image binning/aggregation
|
||||
err::todoval()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! match_end {
|
||||
($nty:ident, $end:expr, $scalar_type:expr, $shape:expr, $agg_kind:expr, $query:expr, $node_config:expr) => {
|
||||
match $end {
|
||||
ByteOrder::Little => {
|
||||
make_num_pipeline_nty_end::<$nty, LittleEndian>($scalar_type, $shape, $agg_kind, $query, $node_config)
|
||||
.await
|
||||
}
|
||||
ByteOrder::Big => {
|
||||
make_num_pipeline_nty_end::<$nty, BigEndian>($scalar_type, $shape, $agg_kind, $query, $node_config)
|
||||
.await
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
async fn make_num_pipeline(
|
||||
scalar_type: ScalarType,
|
||||
byte_order: ByteOrder,
|
||||
shape: Shape,
|
||||
agg_kind: AggKind,
|
||||
query: PreBinnedQuery,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<Box<dyn TimeBinned>>> + Send>>, Error> {
|
||||
match scalar_type {
|
||||
ScalarType::U8 => match_end!(u8, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::U16 => match_end!(u16, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::U32 => match_end!(u32, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::U64 => match_end!(u64, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::I8 => match_end!(i8, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::I16 => match_end!(i16, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::I32 => match_end!(i32, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::I64 => match_end!(i64, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::F32 => match_end!(f32, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::F64 => match_end!(f64, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::BOOL => match_end!(BoolNum, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
ScalarType::STRING => match_end!(StringNum, byte_order, scalar_type, shape, agg_kind, query, node_config),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn pre_binned_bytes_for_http(
|
||||
node_config: &NodeConfigCached,
|
||||
query: &PreBinnedQuery,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Result<Bytes, Error>> + Send>>, Error> {
|
||||
if query.channel().backend != node_config.node_config.cluster.backend {
|
||||
let err = Error::with_msg(format!(
|
||||
"backend mismatch node: {} requested: {}",
|
||||
node_config.node_config.cluster.backend,
|
||||
query.channel().backend
|
||||
));
|
||||
return Err(err);
|
||||
}
|
||||
let patch_node_ix = node_ix_for_patch(query.patch(), query.channel(), &node_config.node_config.cluster);
|
||||
if node_config.ix as u32 != patch_node_ix {
|
||||
let err = Error::with_msg(format!(
|
||||
"pre_binned_bytes_for_http node mismatch node_config.ix {} patch_node_ix {}",
|
||||
node_config.ix, patch_node_ix
|
||||
));
|
||||
return Err(err);
|
||||
}
|
||||
let ret = make_num_pipeline(
|
||||
query.scalar_type().clone(),
|
||||
// TODO actually, make_num_pipeline should not depend on endianness.
|
||||
ByteOrder::Little,
|
||||
query.shape().clone(),
|
||||
query.agg_kind().clone(),
|
||||
query.clone(),
|
||||
node_config,
|
||||
)
|
||||
.await?
|
||||
.map(|item| match item.make_frame() {
|
||||
Ok(item) => Ok(item.freeze()),
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
let ret = Box::pin(ret);
|
||||
Ok(ret)
|
||||
}
|
||||
27
crates/disk/src/binnedstream.rs
Normal file
27
crates/disk/src/binnedstream.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use err::Error;
|
||||
use futures_util::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
// TODO remove after refactor.
|
||||
pub struct BoxedStream<I> {
|
||||
inp: Pin<Box<dyn Stream<Item = I> + Send>>,
|
||||
}
|
||||
|
||||
impl<I> BoxedStream<I> {
|
||||
pub fn new<T>(inp: T) -> Result<Self, Error>
|
||||
where
|
||||
T: Stream<Item = I> + Send + 'static,
|
||||
{
|
||||
Ok(Self { inp: Box::pin(inp) })
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> Stream for BoxedStream<I> {
|
||||
type Item = I;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
self.inp.poll_next_unpin(cx)
|
||||
}
|
||||
}
|
||||
244
crates/disk/src/cache.rs
Normal file
244
crates/disk/src/cache.rs
Normal file
@@ -0,0 +1,244 @@
|
||||
use chrono::Utc;
|
||||
use err::Error;
|
||||
use netpod::log::*;
|
||||
use netpod::timeunits::SEC;
|
||||
use netpod::AggKind;
|
||||
use netpod::Cluster;
|
||||
use netpod::NodeConfigCached;
|
||||
use netpod::PreBinnedPatchCoordEnum;
|
||||
use netpod::SfDbChannel;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::VecDeque;
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use tiny_keccak::Hasher;
|
||||
|
||||
// For file-based caching, this determined the node where the cache file is located.
|
||||
// No longer needed for scylla-based caching.
|
||||
pub fn node_ix_for_patch(patch_coord: &PreBinnedPatchCoordEnum, channel: &SfDbChannel, cluster: &Cluster) -> u32 {
|
||||
let mut hash = tiny_keccak::Sha3::v256();
|
||||
hash.update(channel.backend().as_bytes());
|
||||
hash.update(channel.name().as_bytes());
|
||||
/*hash.update(&patch_coord.patch_beg().to_le_bytes());
|
||||
hash.update(&patch_coord.patch_end().to_le_bytes());
|
||||
hash.update(&patch_coord.bin_t_len().to_le_bytes());
|
||||
hash.update(&patch_coord.patch_t_len().to_le_bytes());*/
|
||||
let mut out = [0; 32];
|
||||
hash.finalize(&mut out);
|
||||
let a = [out[0], out[1], out[2], out[3]];
|
||||
let ix = u32::from_le_bytes(a) % cluster.nodes.len() as u32;
|
||||
ix
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct CacheFileDesc {
|
||||
// What identifies a cached file?
|
||||
channel: SfDbChannel,
|
||||
patch: PreBinnedPatchCoordEnum,
|
||||
agg_kind: AggKind,
|
||||
}
|
||||
|
||||
impl CacheFileDesc {
|
||||
pub fn new(channel: SfDbChannel, patch: PreBinnedPatchCoordEnum, agg_kind: AggKind) -> Self {
|
||||
Self {
|
||||
channel,
|
||||
patch,
|
||||
agg_kind,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hash(&self) -> String {
|
||||
let mut h = tiny_keccak::Sha3::v256();
|
||||
h.update(b"V000");
|
||||
h.update(self.channel.backend().as_bytes());
|
||||
h.update(self.channel.name().as_bytes());
|
||||
h.update(format!("{}", self.agg_kind).as_bytes());
|
||||
//h.update(&self.patch.spec().bin_t_len().to_le_bytes());
|
||||
//h.update(&self.patch.spec().patch_t_len().to_le_bytes());
|
||||
//h.update(&self.patch.ix().to_le_bytes());
|
||||
let mut buf = [0; 32];
|
||||
h.finalize(&mut buf);
|
||||
hex::encode(&buf)
|
||||
}
|
||||
|
||||
pub fn hash_channel(&self) -> String {
|
||||
let mut h = tiny_keccak::Sha3::v256();
|
||||
h.update(b"V000");
|
||||
h.update(self.channel.backend().as_bytes());
|
||||
h.update(self.channel.name().as_bytes());
|
||||
let mut buf = [0; 32];
|
||||
h.finalize(&mut buf);
|
||||
hex::encode(&buf)
|
||||
}
|
||||
|
||||
pub fn path(&self, node_config: &NodeConfigCached) -> PathBuf {
|
||||
let hash = self.hash();
|
||||
let hc = self.hash_channel();
|
||||
node_config
|
||||
.node
|
||||
.cache_base_path
|
||||
.join("cache")
|
||||
.join(&hc[0..3])
|
||||
.join(&hc[3..6])
|
||||
.join(self.channel.name())
|
||||
.join(format!("{}", self.agg_kind))
|
||||
/*.join(format!(
|
||||
"{:010}-{:010}",
|
||||
self.patch.spec().bin_t_len() / SEC,
|
||||
self.patch.spec().patch_t_len() / SEC
|
||||
))
|
||||
.join(format!("{}-{:012}", &hash[0..6], self.patch.ix()))*/
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WrittenPbCache {
|
||||
pub bytes: u64,
|
||||
pub duration: Duration,
|
||||
}
|
||||
|
||||
// TODO only used for old archiver
|
||||
pub async fn write_pb_cache_min_max_avg_scalar<T>(
|
||||
values: T,
|
||||
patch: PreBinnedPatchCoordEnum,
|
||||
agg_kind: AggKind,
|
||||
channel: SfDbChannel,
|
||||
node_config: NodeConfigCached,
|
||||
) -> Result<WrittenPbCache, Error>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
let cfd = CacheFileDesc {
|
||||
channel: channel.clone(),
|
||||
patch: patch.clone(),
|
||||
agg_kind: agg_kind.clone(),
|
||||
};
|
||||
let path = cfd.path(&node_config);
|
||||
let enc = serde_cbor::to_vec(&values)?;
|
||||
let ts1 = Instant::now();
|
||||
tokio::fs::create_dir_all(path.parent().unwrap()).await.map_err(|e| {
|
||||
error!("can not create cache directory {:?}", path.parent());
|
||||
e
|
||||
})?;
|
||||
let now = Utc::now();
|
||||
let mut h = crc32fast::Hasher::new();
|
||||
h.update(&now.timestamp_nanos().to_le_bytes());
|
||||
let r = h.finalize();
|
||||
let tmp_path =
|
||||
path.parent()
|
||||
.unwrap()
|
||||
.join(format!("{}.tmp.{:08x}", path.file_name().unwrap().to_str().unwrap(), r));
|
||||
let res = tokio::task::spawn_blocking({
|
||||
let tmp_path = tmp_path.clone();
|
||||
move || {
|
||||
use fs2::FileExt;
|
||||
use io::Write;
|
||||
info!("try to write tmp at {:?}", tmp_path);
|
||||
let mut f = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&tmp_path)?;
|
||||
if false {
|
||||
f.lock_exclusive()?;
|
||||
}
|
||||
f.write_all(&enc)?;
|
||||
if false {
|
||||
f.unlock()?;
|
||||
}
|
||||
f.flush()?;
|
||||
Ok::<_, Error>(enc.len())
|
||||
}
|
||||
})
|
||||
.await
|
||||
.map_err(Error::from_string)??;
|
||||
tokio::fs::rename(&tmp_path, &path).await?;
|
||||
let ts2 = Instant::now();
|
||||
let ret = WrittenPbCache {
|
||||
bytes: res as u64,
|
||||
duration: ts2.duration_since(ts1),
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct ClearCacheAllResult {
|
||||
pub log: Vec<String>,
|
||||
}
|
||||
|
||||
pub async fn clear_cache_all(node_config: &NodeConfigCached, dry: bool) -> Result<ClearCacheAllResult, Error> {
|
||||
let mut log = vec![];
|
||||
log.push(format!("begin at {:?}", chrono::Utc::now()));
|
||||
if dry {
|
||||
log.push(format!("dry run"));
|
||||
}
|
||||
let mut dirs = VecDeque::new();
|
||||
let mut stack = VecDeque::new();
|
||||
stack.push_front(node_config.node.cache_base_path.join("cache"));
|
||||
loop {
|
||||
match stack.pop_front() {
|
||||
Some(path) => {
|
||||
info!("clear_cache_all try read dir {:?}", path);
|
||||
let mut rd = tokio::fs::read_dir(path).await?;
|
||||
while let Some(entry) = rd.next_entry().await? {
|
||||
let path = entry.path();
|
||||
match path.to_str() {
|
||||
Some(_pathstr) => {
|
||||
let meta = path.symlink_metadata()?;
|
||||
//log.push(format!("len {:7} pathstr {}", meta.len(), pathstr,));
|
||||
let filename_str = path.file_name().unwrap().to_str().unwrap();
|
||||
if filename_str.ends_with("..") || filename_str.ends_with(".") {
|
||||
log.push(format!("ERROR encountered . or .."));
|
||||
} else {
|
||||
if meta.is_dir() {
|
||||
stack.push_front(path.clone());
|
||||
dirs.push_front((meta.len(), path));
|
||||
} else if meta.is_file() {
|
||||
log.push(format!("remove file len {:7} {}", meta.len(), path.to_string_lossy()));
|
||||
if !dry {
|
||||
match tokio::fs::remove_file(&path).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
log.push(format!(
|
||||
"can not remove file {} {:?}",
|
||||
path.to_string_lossy(),
|
||||
e
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.push(format!("not file, note dir"));
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
log.push(format!("Invalid utf-8 path encountered"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
log.push(format!(
|
||||
"start to remove {} dirs at {:?}",
|
||||
dirs.len(),
|
||||
chrono::Utc::now()
|
||||
));
|
||||
for (len, path) in dirs {
|
||||
log.push(format!("remove dir len {} {}", len, path.to_string_lossy()));
|
||||
if !dry {
|
||||
match tokio::fs::remove_dir(&path).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
log.push(format!("can not remove dir {} {:?}", path.to_string_lossy(), e));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log.push(format!("done at {:?}", chrono::Utc::now()));
|
||||
let ret = ClearCacheAllResult { log };
|
||||
Ok(ret)
|
||||
}
|
||||
93
crates/disk/src/channelconfig.rs
Normal file
93
crates/disk/src/channelconfig.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use crate::SfDbChConf;
|
||||
use err::thiserror;
|
||||
#[allow(unused)]
|
||||
use netpod::log::*;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::NodeConfigCached;
|
||||
use netpod::SfDbChannel;
|
||||
use parse::channelconfig::extract_matching_config_entry;
|
||||
use parse::channelconfig::read_local_config;
|
||||
use parse::channelconfig::ChannelConfigs;
|
||||
use parse::channelconfig::ConfigEntry;
|
||||
use parse::channelconfig::ConfigParseError;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ConfigError {
|
||||
ParseError(ConfigParseError),
|
||||
NotFound,
|
||||
Error,
|
||||
}
|
||||
|
||||
// impl fmt::Display for ConfigError {
|
||||
// fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
// write!(fmt, "ConfigError::{self:?}")
|
||||
// }
|
||||
// }
|
||||
|
||||
impl From<ConfigParseError> for ConfigError {
|
||||
fn from(value: ConfigParseError) -> Self {
|
||||
match value {
|
||||
ConfigParseError::FileNotFound => ConfigError::NotFound,
|
||||
x => ConfigError::ParseError(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn config_entry_best_match(
|
||||
range: &NanoRange,
|
||||
channel: SfDbChannel,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<Option<ConfigEntry>, ConfigError> {
|
||||
let channel_config = match read_local_config(channel.clone(), node_config.clone()).await {
|
||||
Ok(x) => x,
|
||||
Err(e) => match e {
|
||||
ConfigParseError::FileNotFound => return Ok(None),
|
||||
e => return Err(e.into()),
|
||||
},
|
||||
};
|
||||
let entry_res = match extract_matching_config_entry(range, &channel_config) {
|
||||
Ok(k) => k,
|
||||
Err(e) => return Err(e)?,
|
||||
};
|
||||
match entry_res.best() {
|
||||
None => Ok(None),
|
||||
Some(x) => Ok(Some(x.clone())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn channel_configs(
|
||||
channel: SfDbChannel,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<ChannelConfigs, ConfigParseError> {
|
||||
read_local_config(channel.clone(), node_config.clone()).await
|
||||
}
|
||||
|
||||
pub async fn channel_config_best_match(
|
||||
range: NanoRange,
|
||||
channel: SfDbChannel,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<Option<SfDbChConf>, ConfigError> {
|
||||
let best = config_entry_best_match(&range, channel.clone(), node_config).await?;
|
||||
match best {
|
||||
None => Ok(None),
|
||||
Some(entry) => {
|
||||
let shape = match entry.to_shape() {
|
||||
Ok(k) => k,
|
||||
// TODO pass error to caller
|
||||
Err(_e) => return Err(ConfigError::Error)?,
|
||||
};
|
||||
let channel_config = SfDbChConf {
|
||||
channel: channel.clone(),
|
||||
keyspace: entry.ks as u8,
|
||||
time_bin_size: entry.bs.clone(),
|
||||
shape,
|
||||
scalar_type: entry.scalar_type.clone(),
|
||||
byte_order: entry.byte_order.clone(),
|
||||
array: entry.is_array,
|
||||
compression: entry.is_compressed,
|
||||
};
|
||||
Ok(Some(channel_config))
|
||||
}
|
||||
}
|
||||
}
|
||||
861
crates/disk/src/dataopen.rs
Normal file
861
crates/disk/src/dataopen.rs
Normal file
@@ -0,0 +1,861 @@
|
||||
use super::paths;
|
||||
use crate::SfDbChConf;
|
||||
use bytes::BytesMut;
|
||||
use err::ErrStr;
|
||||
use err::Error;
|
||||
use futures_util::StreamExt;
|
||||
use netpod::log::*;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::Node;
|
||||
use netpod::SfChFetchInfo;
|
||||
use netpod::TsNano;
|
||||
use std::fmt;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
use tokio::fs::File;
|
||||
use tokio::fs::OpenOptions;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::AsyncSeekExt;
|
||||
use tokio::io::ErrorKind;
|
||||
use tokio::io::SeekFrom;
|
||||
|
||||
const BACKEND: &str = "testbackend-00";
|
||||
|
||||
pub struct Positioned {
|
||||
pub file: OpenedFile,
|
||||
pub found: bool,
|
||||
}
|
||||
|
||||
pub async fn position_file_for_test(
|
||||
path: &PathBuf,
|
||||
range: &NanoRange,
|
||||
expand_left: bool,
|
||||
expand_right: bool,
|
||||
) -> Result<Positioned, Error> {
|
||||
position_file(path, range, expand_left, expand_right).await
|
||||
}
|
||||
|
||||
async fn position_file(
|
||||
path: &PathBuf,
|
||||
range: &NanoRange,
|
||||
expand_left: bool,
|
||||
expand_right: bool,
|
||||
) -> Result<Positioned, Error> {
|
||||
trace!("position_file called expand_left {expand_left} expand_right {expand_right} {range:?} {path:?}");
|
||||
assert_eq!(expand_left && expand_right, false);
|
||||
match OpenOptions::new().read(true).open(&path).await {
|
||||
Ok(file) => {
|
||||
let index_path = PathBuf::from(format!("{}_Index", path.to_str().unwrap()));
|
||||
match OpenOptions::new().read(true).open(&index_path).await {
|
||||
Ok(mut index_file) => {
|
||||
let meta = index_file.metadata().await?;
|
||||
if meta.len() > 1024 * 1024 * 120 {
|
||||
let msg = format!("too large index file {} bytes for {:?}", meta.len(), index_path);
|
||||
error!("{}", msg);
|
||||
return Err(Error::with_msg(msg));
|
||||
} else if meta.len() > 1024 * 1024 * 80 {
|
||||
let msg = format!("very large index file {} bytes for {:?}", meta.len(), index_path);
|
||||
warn!("{}", msg);
|
||||
} else if meta.len() > 1024 * 1024 * 20 {
|
||||
let msg = format!("large index file {} bytes for {:?}", meta.len(), index_path);
|
||||
info!("{}", msg);
|
||||
}
|
||||
if meta.len() < 2 {
|
||||
return Err(Error::with_msg(format!(
|
||||
"bad meta len {} for {:?}",
|
||||
meta.len(),
|
||||
index_path
|
||||
)));
|
||||
}
|
||||
if meta.len() % 16 != 2 {
|
||||
return Err(Error::with_msg(format!(
|
||||
"bad meta len {} for {:?}",
|
||||
meta.len(),
|
||||
index_path
|
||||
)));
|
||||
}
|
||||
let mut buf = BytesMut::with_capacity(meta.len() as usize);
|
||||
buf.resize(buf.capacity(), 0);
|
||||
index_file.read_exact(&mut buf).await?;
|
||||
let gg = if expand_left {
|
||||
super::index::find_largest_smaller_than(range.clone(), expand_right, &buf[2..])
|
||||
} else {
|
||||
super::index::find_ge(range.clone(), expand_right, &buf[2..])
|
||||
};
|
||||
let gg = match gg {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
error!("can not position file for range {range:?} expand_right {expand_right:?} buflen {buflen}", buflen = buf.len());
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
match gg {
|
||||
Some(o) => {
|
||||
let mut file = file;
|
||||
file.seek(SeekFrom::Start(o.1)).await?;
|
||||
//info!("position_file case A {:?}", path);
|
||||
let g = OpenedFile {
|
||||
file: Some(file),
|
||||
path: path.clone(),
|
||||
positioned: true,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
pos: o.1,
|
||||
};
|
||||
return Ok(Positioned { file: g, found: true });
|
||||
}
|
||||
None => {
|
||||
//info!("position_file case B {:?}", path);
|
||||
let g = OpenedFile {
|
||||
file: Some(file),
|
||||
path: path.clone(),
|
||||
positioned: false,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
pos: 0,
|
||||
};
|
||||
return Ok(Positioned { file: g, found: false });
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::NotFound => {
|
||||
let ts1 = Instant::now();
|
||||
let res = if expand_left {
|
||||
super::index::position_static_len_datafile_at_largest_smaller_than(
|
||||
file,
|
||||
range.clone(),
|
||||
expand_right,
|
||||
)
|
||||
.await?
|
||||
} else {
|
||||
super::index::position_static_len_datafile(file, range.clone(), expand_right).await?
|
||||
};
|
||||
let ts2 = Instant::now();
|
||||
if false {
|
||||
// TODO collect for stats:
|
||||
let dur = ts2.duration_since(ts1);
|
||||
info!("position_static_len_datafile took ms {}", dur.as_millis());
|
||||
}
|
||||
let file = res.0;
|
||||
if res.1 {
|
||||
//info!("position_file case C {:?}", path);
|
||||
let g = OpenedFile {
|
||||
file: Some(file),
|
||||
path: path.clone(),
|
||||
positioned: true,
|
||||
index: false,
|
||||
nreads: res.2,
|
||||
pos: res.3,
|
||||
};
|
||||
return Ok(Positioned { file: g, found: true });
|
||||
} else {
|
||||
//info!("position_file case D {:?}", path);
|
||||
let g = OpenedFile {
|
||||
file: Some(file),
|
||||
path: path.clone(),
|
||||
positioned: false,
|
||||
index: false,
|
||||
nreads: res.2,
|
||||
pos: 0,
|
||||
};
|
||||
return Ok(Positioned { file: g, found: false });
|
||||
}
|
||||
}
|
||||
_ => Err(e)?,
|
||||
},
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("can not open {:?} error {:?}", path, e);
|
||||
let g = OpenedFile {
|
||||
file: None,
|
||||
path: path.clone(),
|
||||
positioned: false,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
pos: 0,
|
||||
};
|
||||
return Ok(Positioned { file: g, found: false });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OpenedFile {
|
||||
pub path: PathBuf,
|
||||
pub file: Option<File>,
|
||||
pub positioned: bool,
|
||||
pub index: bool,
|
||||
pub nreads: u32,
|
||||
pub pos: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OpenedFileSet {
|
||||
pub timebin: u64,
|
||||
pub files: Vec<OpenedFile>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for OpenedFile {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("OpenedFile")
|
||||
.field("path", &self.path)
|
||||
.field("file", &self.file)
|
||||
.field("positioned", &self.positioned)
|
||||
.field("index", &self.index)
|
||||
.field("nreads", &self.nreads)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn open_files(
|
||||
range: &NanoRange,
|
||||
fetch_info: &SfChFetchInfo,
|
||||
node: Node,
|
||||
) -> async_channel::Receiver<Result<OpenedFileSet, Error>> {
|
||||
let (chtx, chrx) = async_channel::bounded(2);
|
||||
let range = range.clone();
|
||||
let fetch_info = fetch_info.clone();
|
||||
tokio::spawn(async move {
|
||||
match open_files_inner(&chtx, &range, &fetch_info, node).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
let e = e.add_public_msg(format!(
|
||||
"Can not open file for channel: {fetch_info:?} range: {range:?}"
|
||||
));
|
||||
match chtx.send(Err(e.into())).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
// This case is fine.
|
||||
debug!("open_files channel send error {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
chrx
|
||||
}
|
||||
|
||||
async fn open_files_inner(
|
||||
chtx: &async_channel::Sender<Result<OpenedFileSet, Error>>,
|
||||
range: &NanoRange,
|
||||
fetch_info: &SfChFetchInfo,
|
||||
node: Node,
|
||||
) -> Result<(), Error> {
|
||||
let fetch_info = fetch_info.clone();
|
||||
let timebins = get_timebins(&fetch_info, node.clone()).await?;
|
||||
if timebins.len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
for &tb in &timebins {
|
||||
let ts_bin = TsNano(tb * fetch_info.bs().ns());
|
||||
if ts_bin.ns() >= range.end {
|
||||
continue;
|
||||
}
|
||||
if ts_bin.ns() + fetch_info.bs().ns() <= range.beg {
|
||||
continue;
|
||||
}
|
||||
let mut a = Vec::new();
|
||||
for path in paths::datapaths_for_timebin(tb, &fetch_info, &node).await? {
|
||||
let w = position_file(&path, range, false, false).await?;
|
||||
if w.found {
|
||||
a.push(w.file);
|
||||
}
|
||||
}
|
||||
let h = OpenedFileSet { timebin: tb, files: a };
|
||||
debug!(
|
||||
"----- open_files_inner giving OpenedFileSet with {} files",
|
||||
h.files.len()
|
||||
);
|
||||
chtx.send(Ok(h)).await.errstr()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/**
|
||||
Provide the stream of positioned data files which are relevant for the given parameters.
|
||||
|
||||
Expanded to one event before and after the requested range, if exists.
|
||||
*/
|
||||
pub fn open_expanded_files(
|
||||
range: &NanoRange,
|
||||
fetch_info: &SfChFetchInfo,
|
||||
node: Node,
|
||||
) -> async_channel::Receiver<Result<OpenedFileSet, Error>> {
|
||||
let (chtx, chrx) = async_channel::bounded(2);
|
||||
let range = range.clone();
|
||||
let fetch_info = fetch_info.clone();
|
||||
tokio::spawn(async move {
|
||||
match open_expanded_files_inner(&chtx, &range, &fetch_info, node).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => match chtx.send(Err(e.into())).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
// To be expected
|
||||
debug!("open_files channel send error {:?}", e);
|
||||
}
|
||||
},
|
||||
}
|
||||
});
|
||||
chrx
|
||||
}
|
||||
|
||||
async fn get_timebins(fetch_info: &SfChFetchInfo, node: Node) -> Result<Vec<u64>, Error> {
|
||||
let mut timebins = Vec::new();
|
||||
let p0 = paths::channel_timebins_dir_path(&fetch_info, &node)?;
|
||||
match tokio::fs::read_dir(&p0).await {
|
||||
Ok(rd) => {
|
||||
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
|
||||
while let Some(e) = rd.next().await {
|
||||
let e = e?;
|
||||
let dn = e
|
||||
.file_name()
|
||||
.into_string()
|
||||
.map_err(|e| Error::with_msg(format!("Bad OS path {:?}", e)))?;
|
||||
if dn.len() != 19 {
|
||||
warn!("get_timebins weird directory {:?} p0 {:?}", e.path(), p0);
|
||||
}
|
||||
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
|
||||
if vv == 19 {
|
||||
timebins.push(dn.parse::<u64>()?);
|
||||
}
|
||||
}
|
||||
timebins.sort_unstable();
|
||||
Ok(timebins)
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("get_timebins no timebins for {:?} {:?} p0 {:?}", fetch_info, e, p0);
|
||||
Ok(Vec::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn open_expanded_files_inner(
|
||||
chtx: &async_channel::Sender<Result<OpenedFileSet, Error>>,
|
||||
range: &NanoRange,
|
||||
fetch_info: &SfChFetchInfo,
|
||||
node: Node,
|
||||
) -> Result<(), Error> {
|
||||
let fetch_info = fetch_info.clone();
|
||||
let timebins = get_timebins(&fetch_info, node.clone()).await?;
|
||||
if timebins.len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
let mut p1 = None;
|
||||
for (i1, tb) in timebins.iter().enumerate().rev() {
|
||||
let ts_bin = TsNano(tb * fetch_info.bs().ns());
|
||||
if ts_bin.ns() <= range.beg {
|
||||
p1 = Some(i1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
let mut p1 = if let Some(i1) = p1 { i1 } else { 0 };
|
||||
if p1 >= timebins.len() {
|
||||
return Err(Error::with_msg(format!(
|
||||
"logic error p1 {} range {:?} fetch_info {:?}",
|
||||
p1, range, fetch_info
|
||||
)));
|
||||
}
|
||||
let mut found_pre = false;
|
||||
loop {
|
||||
let tb = timebins[p1];
|
||||
let mut a = Vec::new();
|
||||
for path in paths::datapaths_for_timebin(tb, &fetch_info, &node).await? {
|
||||
let w = position_file(&path, range, true, false).await?;
|
||||
if w.found {
|
||||
debug!("----- open_expanded_files_inner w.found for {:?}", path);
|
||||
a.push(w.file);
|
||||
found_pre = true;
|
||||
}
|
||||
}
|
||||
let h = OpenedFileSet { timebin: tb, files: a };
|
||||
debug!(
|
||||
"----- open_expanded_files_inner giving OpenedFileSet with {} files",
|
||||
h.files.len()
|
||||
);
|
||||
chtx.send(Ok(h)).await.errstr()?;
|
||||
if found_pre {
|
||||
p1 += 1;
|
||||
break;
|
||||
} else if p1 == 0 {
|
||||
break;
|
||||
} else {
|
||||
p1 -= 1;
|
||||
}
|
||||
}
|
||||
if found_pre {
|
||||
// Append all following positioned files.
|
||||
while p1 < timebins.len() {
|
||||
let tb = timebins[p1];
|
||||
let mut a = Vec::new();
|
||||
for path in paths::datapaths_for_timebin(tb, &fetch_info, &node).await? {
|
||||
let w = position_file(&path, range, false, true).await?;
|
||||
if w.found {
|
||||
a.push(w.file);
|
||||
}
|
||||
}
|
||||
let h = OpenedFileSet { timebin: tb, files: a };
|
||||
chtx.send(Ok(h)).await.errstr()?;
|
||||
p1 += 1;
|
||||
}
|
||||
} else {
|
||||
// TODO emit statsfor this or log somewhere?
|
||||
debug!("Could not find some event before the requested range, fall back to standard file list.");
|
||||
// Try to locate files according to non-expand-algorithm.
|
||||
open_files_inner(chtx, range, &fetch_info, node).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use err::Error;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::test_data_base_path_databuffer;
|
||||
use netpod::timeunits::*;
|
||||
use std::path::PathBuf;
|
||||
use tokio::fs::OpenOptions;
|
||||
|
||||
fn scalar_file_path() -> PathBuf {
|
||||
test_data_base_path_databuffer()
|
||||
.join("node00/ks_2/byTime/scalar-i32-be")
|
||||
.join("0000000000000000001/0000000000/0000000000086400000_00000_Data")
|
||||
}
|
||||
|
||||
fn wave_file_path() -> PathBuf {
|
||||
test_data_base_path_databuffer()
|
||||
.join("node00/ks_3/byTime/wave-f64-be-n21")
|
||||
.join("0000000000000000001/0000000000/0000000000086400000_00000_Data")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_file_at_begin() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY,
|
||||
end: DAY + MS * 20000,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 23);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_file_for_empty_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 80000,
|
||||
end: DAY + MS * 80000,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_file_at_begin_for_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY,
|
||||
end: DAY + MS * 300000,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 23);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_file_at_inner() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 4000,
|
||||
end: DAY + MS * 7000,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 179);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO add same test for WAVE
|
||||
#[test]
|
||||
fn position_basic_file_at_inner_for_too_small_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 1501,
|
||||
end: DAY + MS * 1502,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO add same test for WAVE
|
||||
#[test]
|
||||
fn position_basic_file_starts_after_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: HOUR * 22,
|
||||
end: HOUR * 23,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_file_ends_before_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY * 2,
|
||||
end: DAY * 2 + HOUR,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_index() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = wave_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 4000,
|
||||
end: DAY + MS * 90000,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, true);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 184);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_index_too_small_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = wave_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 3100,
|
||||
end: DAY + MS * 3200,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, true);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_index_starts_after_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = wave_file_path();
|
||||
let range = NanoRange {
|
||||
beg: HOUR * 10,
|
||||
end: HOUR * 12,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, true);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_basic_index_ends_before_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = wave_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY * 2,
|
||||
end: DAY * 2 + MS * 40000,
|
||||
};
|
||||
let res = position_file(&path, &range, false, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, true);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
assert_eq!(res.file.pos, 0);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//
|
||||
// -------------- Expanded -----------------------------------
|
||||
//
|
||||
|
||||
#[test]
|
||||
fn position_expand_file_at_begin_no_fallback() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 3000,
|
||||
end: DAY + MS * 40000,
|
||||
};
|
||||
let file = OpenOptions::new().read(true).open(path).await?;
|
||||
let res =
|
||||
super::super::index::position_static_len_datafile_at_largest_smaller_than(file, range.clone(), true)
|
||||
.await?;
|
||||
assert_eq!(res.1, true);
|
||||
assert_eq!(res.3, 75);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok::<_, Error>(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_expand_left_file_at_evts_file_begin() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY,
|
||||
end: DAY + MS * 40000,
|
||||
};
|
||||
let res = position_file(&path, &range, true, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_expand_right_file_at_evts_file_begin() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY,
|
||||
end: DAY + MS * 40000,
|
||||
};
|
||||
let res = position_file(&path, &range, false, true).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 23);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn position_expand_left_file_at_evts_file_within() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 3000,
|
||||
end: DAY + MS * 40000,
|
||||
};
|
||||
let res = position_file(&path, &range, true, false).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 75);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ------- TODO do the same with Wave (index)
|
||||
#[test]
|
||||
fn position_expand_left_file_ends_before_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY * 2,
|
||||
end: DAY * 2 + MS * 40000,
|
||||
};
|
||||
let res = position_file(&path, &range, true, false).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 2995171);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ------- TODO do the same with Wave (index)
|
||||
#[test]
|
||||
fn position_expand_left_file_begins_exactly_after_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: HOUR * 23,
|
||||
end: DAY,
|
||||
};
|
||||
let res = position_file(&path, &range, true, false).await?;
|
||||
assert_eq!(res.found, false);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, false);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ------- TODO do the same with Wave (index)
|
||||
#[test]
|
||||
fn position_expand_right_file_begins_exactly_after_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: HOUR * 23,
|
||||
end: DAY,
|
||||
};
|
||||
let res = position_file(&path, &range, false, true).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 23);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO add same test for indexed
|
||||
#[test]
|
||||
fn position_expand_left_basic_file_at_inner_for_too_small_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 1501,
|
||||
end: DAY + MS * 1502,
|
||||
};
|
||||
let res = position_file(&path, &range, true, false).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 75);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// TODO add same test for indexed
|
||||
#[test]
|
||||
fn position_expand_right_basic_file_at_inner_for_too_small_range() -> Result<(), Error> {
|
||||
let fut = async {
|
||||
let path = scalar_file_path();
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 1501,
|
||||
end: DAY + MS * 1502,
|
||||
};
|
||||
let res = position_file(&path, &range, false, true).await?;
|
||||
assert_eq!(res.found, true);
|
||||
assert_eq!(res.file.index, false);
|
||||
assert_eq!(res.file.positioned, true);
|
||||
assert_eq!(res.file.pos, 127);
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(fut)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expanded_file_list() {
|
||||
let range = NanoRange {
|
||||
beg: DAY + HOUR * 5,
|
||||
end: DAY + HOUR * 8,
|
||||
};
|
||||
let chn = netpod::SfDbChannel::from_name(BACKEND, "scalar-i32-be");
|
||||
// TODO read config from disk? Or expose the config from data generator?
|
||||
let fetch_info = todo!();
|
||||
// let fetch_info = SfChFetchInfo {
|
||||
// channel: chn,
|
||||
// keyspace: 2,
|
||||
// time_bin_size: TsNano(DAY),
|
||||
// scalar_type: netpod::ScalarType::I32,
|
||||
// byte_order: netpod::ByteOrder::Big,
|
||||
// shape: netpod::Shape::Scalar,
|
||||
// array: false,
|
||||
// compression: false,
|
||||
// };
|
||||
let cluster = netpod::test_cluster();
|
||||
let task = async move {
|
||||
let mut paths = Vec::new();
|
||||
let mut files = open_expanded_files(&range, &fetch_info, cluster.nodes[0].clone());
|
||||
while let Some(file) = files.next().await {
|
||||
match file {
|
||||
Ok(k) => {
|
||||
debug!("opened file: {:?}", k);
|
||||
paths.push(k.files);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("error while trying to open {:?}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if paths.len() != 2 {
|
||||
return Err(Error::with_msg_no_trace(format!(
|
||||
"expected 2 files got {n}",
|
||||
n = paths.len()
|
||||
)));
|
||||
}
|
||||
Ok::<_, Error>(())
|
||||
};
|
||||
taskrun::run(task).unwrap();
|
||||
}
|
||||
}
|
||||
448
crates/disk/src/decode.rs
Normal file
448
crates/disk/src/decode.rs
Normal file
@@ -0,0 +1,448 @@
|
||||
use crate::eventblobs::EventChunkerMultifile;
|
||||
use err::Error;
|
||||
use futures_util::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use items_0::scalar_ops::ScalarOps;
|
||||
use items_0::streamitem::RangeCompletableItem;
|
||||
use items_0::streamitem::Sitemty;
|
||||
use items_0::streamitem::StreamItem;
|
||||
use items_0::Events;
|
||||
use items_0::WithLen;
|
||||
use items_2::eventfull::EventFull;
|
||||
use items_2::eventsdim0::EventsDim0;
|
||||
use items_2::eventsdim1::EventsDim1;
|
||||
use netpod::log::*;
|
||||
use netpod::AggKind;
|
||||
use netpod::ScalarType;
|
||||
use netpod::Shape;
|
||||
use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
|
||||
pub trait Endianness: Send + Unpin {
|
||||
fn is_big() -> bool;
|
||||
}
|
||||
|
||||
pub struct LittleEndian {}
|
||||
|
||||
pub struct BigEndian {}
|
||||
|
||||
impl Endianness for LittleEndian {
|
||||
fn is_big() -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Endianness for BigEndian {
|
||||
fn is_big() -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Endian {
|
||||
Little,
|
||||
Big,
|
||||
}
|
||||
|
||||
pub trait ScalarValueFromBytes<STY> {
|
||||
fn convert(buf: &[u8], endian: Endian) -> Result<STY, Error>;
|
||||
fn convert_dim1(buf: &[u8], endian: Endian, n: usize) -> Result<Vec<STY>, Error>;
|
||||
}
|
||||
|
||||
macro_rules! impl_scalar_value_from_bytes {
|
||||
($nty:ident, $nl:expr) => {
|
||||
impl ScalarValueFromBytes<$nty> for $nty {
|
||||
// Error in data on disk:
|
||||
// Can not rely on byte order as stated in the channel config.
|
||||
// Endianness in sf-databuffer can be specified for each event.
|
||||
fn convert(buf: &[u8], endian: Endian) -> Result<$nty, Error> {
|
||||
//$nty::$ec(*arrayref::array_ref![buf, 0, $nl])
|
||||
use Endian::*;
|
||||
let ret = match endian {
|
||||
Little => $nty::from_le_bytes(buf[..$nl].try_into()?),
|
||||
Big => $nty::from_be_bytes(buf[..$nl].try_into()?),
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
fn convert_dim1(buf: &[u8], endian: Endian, n: usize) -> Result<Vec<$nty>, Error> {
|
||||
let ret = buf
|
||||
.chunks_exact(n.min($nl))
|
||||
.map(|b2| {
|
||||
use Endian::*;
|
||||
let ret = match endian {
|
||||
Little => $nty::from_le_bytes(b2[..$nl].try_into().unwrap()),
|
||||
Big => $nty::from_be_bytes(b2[..$nl].try_into().unwrap()),
|
||||
};
|
||||
ret
|
||||
})
|
||||
.collect();
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_scalar_value_from_bytes!(u8, 1);
|
||||
impl_scalar_value_from_bytes!(u16, 2);
|
||||
impl_scalar_value_from_bytes!(u32, 4);
|
||||
impl_scalar_value_from_bytes!(u64, 8);
|
||||
impl_scalar_value_from_bytes!(i8, 1);
|
||||
impl_scalar_value_from_bytes!(i16, 2);
|
||||
impl_scalar_value_from_bytes!(i32, 4);
|
||||
impl_scalar_value_from_bytes!(i64, 8);
|
||||
impl_scalar_value_from_bytes!(f32, 4);
|
||||
impl_scalar_value_from_bytes!(f64, 8);
|
||||
|
||||
impl ScalarValueFromBytes<String> for String {
|
||||
fn convert(buf: &[u8], _endian: Endian) -> Result<String, Error> {
|
||||
let s = if buf.len() >= 255 {
|
||||
String::from_utf8_lossy(&buf[..255])
|
||||
} else {
|
||||
String::from_utf8_lossy(buf)
|
||||
};
|
||||
Ok(s.into())
|
||||
}
|
||||
|
||||
fn convert_dim1(buf: &[u8], _endian: Endian, _n: usize) -> Result<Vec<String>, Error> {
|
||||
let s = if buf.len() >= 255 {
|
||||
String::from_utf8_lossy(&buf[..255])
|
||||
} else {
|
||||
String::from_utf8_lossy(buf)
|
||||
};
|
||||
Ok(vec![s.into()])
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarValueFromBytes<bool> for bool {
|
||||
fn convert(buf: &[u8], _endian: Endian) -> Result<bool, Error> {
|
||||
if buf.len() >= 1 {
|
||||
if buf[0] != 0 {
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_dim1(buf: &[u8], _endian: Endian, n: usize) -> Result<Vec<bool>, Error> {
|
||||
let nn = buf.len().min(n);
|
||||
Ok(buf.iter().take(nn).map(|&x| x != 0).collect())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ValueFromBytes: Send {
|
||||
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error>;
|
||||
}
|
||||
|
||||
pub trait ValueDim0FromBytes {
|
||||
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error>;
|
||||
}
|
||||
|
||||
pub trait ValueDim1FromBytes {
|
||||
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error>;
|
||||
}
|
||||
|
||||
pub struct ValueDim0FromBytesImpl<STY>
|
||||
where
|
||||
STY: ScalarOps,
|
||||
{
|
||||
_m1: PhantomData<STY>,
|
||||
}
|
||||
|
||||
impl<STY> ValueDim0FromBytesImpl<STY>
|
||||
where
|
||||
STY: ScalarOps + ScalarValueFromBytes<STY>,
|
||||
{
|
||||
fn boxed() -> Box<dyn ValueFromBytes> {
|
||||
Box::new(Self {
|
||||
_m1: Default::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<STY> ValueDim0FromBytes for ValueDim0FromBytesImpl<STY>
|
||||
where
|
||||
STY: ScalarOps + ScalarValueFromBytes<STY>,
|
||||
{
|
||||
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error> {
|
||||
if let Some(evs) = events.as_any_mut().downcast_mut::<EventsDim0<STY>>() {
|
||||
let v = <STY as ScalarValueFromBytes<STY>>::convert(buf, endian)?;
|
||||
evs.values.push_back(v);
|
||||
evs.tss.push_back(ts);
|
||||
evs.pulses.push_back(pulse);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::with_msg_no_trace("unexpected container"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<STY> ValueFromBytes for ValueDim0FromBytesImpl<STY>
|
||||
where
|
||||
STY: ScalarOps + ScalarValueFromBytes<STY>,
|
||||
{
|
||||
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error> {
|
||||
ValueDim0FromBytes::convert(self, ts, pulse, buf, endian, events)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ValueDim1FromBytesImpl<STY>
|
||||
where
|
||||
STY: ScalarOps,
|
||||
{
|
||||
shape: Shape,
|
||||
_m1: PhantomData<STY>,
|
||||
}
|
||||
|
||||
impl<STY> ValueDim1FromBytesImpl<STY>
|
||||
where
|
||||
STY: ScalarOps + ScalarValueFromBytes<STY>,
|
||||
{
|
||||
fn boxed(shape: Shape) -> Box<dyn ValueFromBytes> {
|
||||
Box::new(Self {
|
||||
shape,
|
||||
_m1: Default::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<STY> ValueFromBytes for ValueDim1FromBytesImpl<STY>
|
||||
where
|
||||
STY: ScalarOps + ScalarValueFromBytes<STY>,
|
||||
{
|
||||
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error> {
|
||||
ValueDim1FromBytes::convert(self, ts, pulse, buf, endian, events)
|
||||
}
|
||||
}
|
||||
|
||||
impl<STY> ValueDim1FromBytes for ValueDim1FromBytesImpl<STY>
|
||||
where
|
||||
STY: ScalarOps + ScalarValueFromBytes<STY>,
|
||||
{
|
||||
fn convert(&self, ts: u64, pulse: u64, buf: &[u8], endian: Endian, events: &mut dyn Events) -> Result<(), Error> {
|
||||
if let Some(evs) = events.as_any_mut().downcast_mut::<EventsDim1<STY>>() {
|
||||
let n = if let Shape::Wave(n) = self.shape {
|
||||
n
|
||||
} else {
|
||||
return Err(Error::with_msg_no_trace("ValueDim1FromBytesImpl bad shape"));
|
||||
};
|
||||
let v = <STY as ScalarValueFromBytes<STY>>::convert_dim1(buf, endian, n as _)?;
|
||||
evs.values.push_back(v);
|
||||
evs.tss.push_back(ts);
|
||||
evs.pulses.push_back(pulse);
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::with_msg_no_trace("unexpected container"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn make_scalar_conv(
|
||||
scalar_type: &ScalarType,
|
||||
shape: &Shape,
|
||||
agg_kind: &AggKind,
|
||||
) -> Result<Box<dyn ValueFromBytes>, Error> {
|
||||
let ret = match agg_kind {
|
||||
AggKind::EventBlobs => todo!("make_scalar_conv EventBlobs"),
|
||||
AggKind::Plain
|
||||
| AggKind::DimXBinsN(_)
|
||||
| AggKind::DimXBins1
|
||||
| AggKind::TimeWeightedScalar
|
||||
| AggKind::PulseIdDiff => match shape {
|
||||
Shape::Scalar => match scalar_type {
|
||||
ScalarType::U8 => ValueDim0FromBytesImpl::<u8>::boxed(),
|
||||
ScalarType::U16 => ValueDim0FromBytesImpl::<u16>::boxed(),
|
||||
ScalarType::U32 => ValueDim0FromBytesImpl::<u32>::boxed(),
|
||||
ScalarType::U64 => ValueDim0FromBytesImpl::<u64>::boxed(),
|
||||
ScalarType::I8 => ValueDim0FromBytesImpl::<i8>::boxed(),
|
||||
ScalarType::I16 => ValueDim0FromBytesImpl::<i16>::boxed(),
|
||||
ScalarType::I32 => ValueDim0FromBytesImpl::<i32>::boxed(),
|
||||
ScalarType::I64 => ValueDim0FromBytesImpl::<i64>::boxed(),
|
||||
ScalarType::F32 => ValueDim0FromBytesImpl::<f32>::boxed(),
|
||||
ScalarType::F64 => ValueDim0FromBytesImpl::<f64>::boxed(),
|
||||
ScalarType::BOOL => ValueDim0FromBytesImpl::<bool>::boxed(),
|
||||
ScalarType::STRING => ValueDim0FromBytesImpl::<String>::boxed(),
|
||||
},
|
||||
Shape::Wave(_) => {
|
||||
let shape = shape.clone();
|
||||
match scalar_type {
|
||||
ScalarType::U8 => ValueDim1FromBytesImpl::<u8>::boxed(shape),
|
||||
ScalarType::U16 => ValueDim1FromBytesImpl::<u16>::boxed(shape),
|
||||
ScalarType::U32 => ValueDim1FromBytesImpl::<u32>::boxed(shape),
|
||||
ScalarType::U64 => ValueDim1FromBytesImpl::<u64>::boxed(shape),
|
||||
ScalarType::I8 => ValueDim1FromBytesImpl::<i8>::boxed(shape),
|
||||
ScalarType::I16 => ValueDim1FromBytesImpl::<i16>::boxed(shape),
|
||||
ScalarType::I32 => ValueDim1FromBytesImpl::<i32>::boxed(shape),
|
||||
ScalarType::I64 => ValueDim1FromBytesImpl::<i64>::boxed(shape),
|
||||
ScalarType::F32 => ValueDim1FromBytesImpl::<f32>::boxed(shape),
|
||||
ScalarType::F64 => ValueDim1FromBytesImpl::<f64>::boxed(shape),
|
||||
ScalarType::BOOL => ValueDim1FromBytesImpl::<bool>::boxed(shape),
|
||||
ScalarType::STRING => ValueDim1FromBytesImpl::<String>::boxed(shape),
|
||||
}
|
||||
}
|
||||
Shape::Image(_, _) => todo!("make_scalar_conv Image"),
|
||||
},
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub struct EventsDynStream {
|
||||
scalar_type: ScalarType,
|
||||
shape: Shape,
|
||||
agg_kind: AggKind,
|
||||
events_full: EventChunkerMultifile,
|
||||
events_out: Box<dyn Events>,
|
||||
scalar_conv: Box<dyn ValueFromBytes>,
|
||||
emit_threshold: usize,
|
||||
done: bool,
|
||||
complete: bool,
|
||||
}
|
||||
|
||||
impl EventsDynStream {
|
||||
pub fn type_name() -> &'static str {
|
||||
std::any::type_name::<Self>()
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
scalar_type: ScalarType,
|
||||
shape: Shape,
|
||||
agg_kind: AggKind,
|
||||
events_full: EventChunkerMultifile,
|
||||
) -> Result<Self, Error> {
|
||||
let st = &scalar_type;
|
||||
let sh = &shape;
|
||||
let ag = &agg_kind;
|
||||
warn!("TODO EventsDynStream::new feed through transform");
|
||||
// TODO do we need/want the empty item from here?
|
||||
let events_out = items_2::empty::empty_events_dyn_ev(st, sh)?;
|
||||
let scalar_conv = make_scalar_conv(st, sh, ag)?;
|
||||
let emit_threshold = match &shape {
|
||||
Shape::Scalar => 2048,
|
||||
Shape::Wave(_) => 64,
|
||||
Shape::Image(_, _) => 1,
|
||||
};
|
||||
let ret = Self {
|
||||
scalar_type,
|
||||
shape,
|
||||
agg_kind,
|
||||
events_full,
|
||||
events_out,
|
||||
scalar_conv,
|
||||
emit_threshold,
|
||||
done: false,
|
||||
complete: false,
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
fn replace_events_out(&mut self) -> Result<Box<dyn Events>, Error> {
|
||||
let st = &self.scalar_type;
|
||||
let sh = &self.shape;
|
||||
error!("TODO replace_events_out feed through transform");
|
||||
// TODO do we need/want the empty item from here?
|
||||
let empty = items_2::empty::empty_events_dyn_ev(st, sh)?;
|
||||
let evs = mem::replace(&mut self.events_out, empty);
|
||||
Ok(evs)
|
||||
}
|
||||
|
||||
fn handle_event_full(&mut self, item: EventFull) -> Result<(), Error> {
|
||||
if item.len() >= self.emit_threshold {
|
||||
info!("handle_event_full item len {}", item.len());
|
||||
}
|
||||
for (((buf, &be), &ts), &pulse) in item
|
||||
.blobs
|
||||
.iter()
|
||||
.zip(item.be.iter())
|
||||
.zip(item.tss.iter())
|
||||
.zip(item.pulses.iter())
|
||||
{
|
||||
let endian = if be { Endian::Big } else { Endian::Little };
|
||||
let buf = if let Some(x) = buf {
|
||||
x
|
||||
} else {
|
||||
return Err(Error::with_msg_no_trace("no buf in event"));
|
||||
};
|
||||
self.scalar_conv
|
||||
.convert(ts, pulse, buf, endian, self.events_out.as_mut())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_stream_item(
|
||||
&mut self,
|
||||
item: StreamItem<RangeCompletableItem<EventFull>>,
|
||||
) -> Result<Option<Sitemty<Box<dyn Events>>>, Error> {
|
||||
let ret = match item {
|
||||
StreamItem::DataItem(item) => match item {
|
||||
RangeCompletableItem::RangeComplete => {
|
||||
Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)))
|
||||
}
|
||||
RangeCompletableItem::Data(item) => match self.handle_event_full(item) {
|
||||
Ok(()) => {
|
||||
// TODO collect stats.
|
||||
if self.events_out.len() >= self.emit_threshold {
|
||||
let evs = self.replace_events_out()?;
|
||||
Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(evs))))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Err(e) => Some(Err(e)),
|
||||
},
|
||||
},
|
||||
StreamItem::Log(item) => Some(Ok(StreamItem::Log(item))),
|
||||
StreamItem::Stats(item) => Some(Ok(StreamItem::Stats(item))),
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for EventsDynStream {
|
||||
type Item = Sitemty<Box<dyn Events>>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
loop {
|
||||
break if self.complete {
|
||||
panic!("{} poll_next on complete", Self::type_name())
|
||||
} else if self.done {
|
||||
self.complete = true;
|
||||
Ready(None)
|
||||
} else {
|
||||
match self.events_full.poll_next_unpin(cx) {
|
||||
Ready(Some(Ok(item))) => match self.handle_stream_item(item) {
|
||||
Ok(Some(item)) => Ready(Some(item)),
|
||||
Ok(None) => continue,
|
||||
Err(e) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
},
|
||||
Ready(Some(Err(e))) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Ready(None) => {
|
||||
// Produce a last one even if it is empty.
|
||||
match self.replace_events_out() {
|
||||
Ok(item) => {
|
||||
self.done = true;
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::Data(item)))))
|
||||
}
|
||||
Err(e) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
}
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
807
crates/disk/src/disk.rs
Normal file
807
crates/disk/src/disk.rs
Normal file
@@ -0,0 +1,807 @@
|
||||
#[cfg(test)]
|
||||
pub mod aggtest;
|
||||
pub mod binnedstream;
|
||||
pub mod cache;
|
||||
pub mod channelconfig;
|
||||
pub mod dataopen;
|
||||
pub mod decode;
|
||||
pub mod eventblobs;
|
||||
pub mod eventchunker;
|
||||
pub mod frame;
|
||||
pub mod gen;
|
||||
pub mod index;
|
||||
pub mod merge;
|
||||
pub mod paths;
|
||||
pub mod raw;
|
||||
pub mod read3;
|
||||
pub mod read4;
|
||||
pub mod streamlog;
|
||||
|
||||
pub use parse;
|
||||
|
||||
use bytes::Bytes;
|
||||
use bytes::BytesMut;
|
||||
use err::Error;
|
||||
use futures_util::future::FusedFuture;
|
||||
use futures_util::FutureExt;
|
||||
use futures_util::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use futures_util::TryFutureExt;
|
||||
use netpod::log::*;
|
||||
use netpod::ByteOrder;
|
||||
use netpod::DiskIoTune;
|
||||
use netpod::DtNano;
|
||||
use netpod::Node;
|
||||
use netpod::ReadSys;
|
||||
use netpod::ScalarType;
|
||||
use netpod::SfDbChannel;
|
||||
use netpod::Shape;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::VecDeque;
|
||||
use std::future::Future;
|
||||
use std::io::SeekFrom;
|
||||
use std::mem;
|
||||
use std::os::unix::prelude::AsRawFd;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use streams::dtflags::ARRAY;
|
||||
use streams::dtflags::BIG_ENDIAN;
|
||||
use streams::dtflags::COMPRESSION;
|
||||
use streams::dtflags::SHAPE;
|
||||
use streams::filechunkread::FileChunkRead;
|
||||
use tokio::fs::File;
|
||||
use tokio::fs::OpenOptions;
|
||||
use tokio::io::AsyncRead;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::AsyncSeekExt;
|
||||
use tokio::io::ReadBuf;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
// TODO move to databuffer-specific crate
|
||||
// TODO duplicate of SfChFetchInfo?
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct SfDbChConf {
|
||||
pub channel: SfDbChannel,
|
||||
pub keyspace: u8,
|
||||
pub time_bin_size: DtNano,
|
||||
pub scalar_type: ScalarType,
|
||||
pub compression: bool,
|
||||
pub shape: Shape,
|
||||
pub array: bool,
|
||||
pub byte_order: ByteOrder,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct AggQuerySingleChannel {
|
||||
pub channel_config: SfDbChConf,
|
||||
pub timebin: u32,
|
||||
pub tb_file_count: u32,
|
||||
pub buffer_size: u32,
|
||||
}
|
||||
|
||||
// TODO transform this into a self-test or remove.
|
||||
pub async fn read_test_1(query: &AggQuerySingleChannel, node: Node) -> Result<netpod::BodyStream, Error> {
|
||||
let path = paths::datapath(query.timebin as u64, &query.channel_config, 0, &node);
|
||||
debug!("try path: {:?}", path);
|
||||
let fin = OpenOptions::new().read(true).open(path).await?;
|
||||
let meta = fin.metadata().await;
|
||||
debug!("file meta {:?}", meta);
|
||||
let stream = netpod::BodyStream {
|
||||
inner: Box::new(FileReader {
|
||||
file: fin,
|
||||
nreads: 0,
|
||||
buffer_size: query.buffer_size,
|
||||
}),
|
||||
};
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
struct FileReader {
|
||||
file: tokio::fs::File,
|
||||
nreads: u32,
|
||||
buffer_size: u32,
|
||||
}
|
||||
|
||||
impl Stream for FileReader {
|
||||
type Item = Result<Bytes, Error>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
err::todo();
|
||||
// TODO remove if no longer used?
|
||||
let blen = self.buffer_size as usize;
|
||||
let mut buf2 = BytesMut::with_capacity(blen);
|
||||
buf2.resize(buf2.capacity(), 0);
|
||||
if buf2.as_mut().len() != blen {
|
||||
panic!("logic");
|
||||
}
|
||||
let mut buf = tokio::io::ReadBuf::new(buf2.as_mut());
|
||||
if buf.filled().len() != 0 {
|
||||
panic!("logic");
|
||||
}
|
||||
match Pin::new(&mut self.file).poll_read(cx, &mut buf) {
|
||||
Poll::Ready(Ok(_)) => {
|
||||
let rlen = buf.filled().len();
|
||||
if rlen == 0 {
|
||||
Poll::Ready(None)
|
||||
} else {
|
||||
if rlen != blen {
|
||||
info!("short read {} of {}", buf.filled().len(), blen);
|
||||
}
|
||||
self.nreads += 1;
|
||||
Poll::Ready(Some(Ok(buf2.freeze())))
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Some(Err(Error::from(e)))),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Fopen1 {
|
||||
#[allow(dead_code)]
|
||||
opts: OpenOptions,
|
||||
fut: Pin<Box<dyn Future<Output = Result<File, std::io::Error>>>>,
|
||||
term: bool,
|
||||
}
|
||||
|
||||
impl Fopen1 {
|
||||
pub fn new(path: PathBuf) -> Self {
|
||||
let fut = Box::pin(async {
|
||||
let mut o1 = OpenOptions::new();
|
||||
let o2 = o1.read(true);
|
||||
let res = o2.open(path);
|
||||
res.await
|
||||
}) as Pin<Box<dyn Future<Output = Result<File, std::io::Error>>>>;
|
||||
let _fut2: Box<dyn Future<Output = u32>> = Box::new(async { 123 });
|
||||
Self {
|
||||
opts: OpenOptions::new(),
|
||||
fut,
|
||||
term: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Future for Fopen1 {
|
||||
type Output = Result<File, Error>;
|
||||
|
||||
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
|
||||
let g = self.fut.as_mut();
|
||||
match g.poll(cx) {
|
||||
Poll::Ready(Ok(k)) => {
|
||||
self.term = true;
|
||||
Poll::Ready(Ok(k))
|
||||
}
|
||||
Poll::Ready(Err(k)) => {
|
||||
self.term = true;
|
||||
Poll::Ready(Err(k.into()))
|
||||
}
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FusedFuture for Fopen1 {
|
||||
fn is_terminated(&self) -> bool {
|
||||
self.term
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for Fopen1 {}
|
||||
|
||||
pub struct FileContentStream {
|
||||
file: File,
|
||||
disk_io_tune: DiskIoTune,
|
||||
read_going: bool,
|
||||
buf: BytesMut,
|
||||
ts1: Instant,
|
||||
nlog: usize,
|
||||
done: bool,
|
||||
complete: bool,
|
||||
}
|
||||
|
||||
impl FileContentStream {
|
||||
pub fn type_name() -> &'static str {
|
||||
std::any::type_name::<Self>()
|
||||
}
|
||||
|
||||
pub fn new(file: File, disk_io_tune: DiskIoTune) -> Self {
|
||||
Self {
|
||||
file,
|
||||
disk_io_tune,
|
||||
read_going: false,
|
||||
buf: BytesMut::new(),
|
||||
ts1: Instant::now(),
|
||||
nlog: 0,
|
||||
done: false,
|
||||
complete: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for FileContentStream {
|
||||
type Item = Result<FileChunkRead, Error>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
loop {
|
||||
break if self.complete {
|
||||
panic!("{} poll_next on complete", Self::type_name())
|
||||
} else if self.done {
|
||||
self.complete = true;
|
||||
Ready(None)
|
||||
} else {
|
||||
let mut buf = if !self.read_going {
|
||||
self.ts1 = Instant::now();
|
||||
let mut buf = BytesMut::new();
|
||||
buf.resize(self.disk_io_tune.read_buffer_len, 0);
|
||||
buf
|
||||
} else {
|
||||
mem::replace(&mut self.buf, BytesMut::new())
|
||||
};
|
||||
let mutsl = buf.as_mut();
|
||||
let mut rb = ReadBuf::new(mutsl);
|
||||
let f1 = &mut self.file;
|
||||
let f2 = Pin::new(f1);
|
||||
let pollres = AsyncRead::poll_read(f2, cx, &mut rb);
|
||||
match pollres {
|
||||
Ready(Ok(_)) => {
|
||||
let nread = rb.filled().len();
|
||||
buf.truncate(nread);
|
||||
self.read_going = false;
|
||||
let ts2 = Instant::now();
|
||||
if nread == 0 {
|
||||
let ret = FileChunkRead::with_buf_dur(buf, ts2.duration_since(self.ts1));
|
||||
self.done = true;
|
||||
Ready(Some(Ok(ret)))
|
||||
} else {
|
||||
let ret = FileChunkRead::with_buf_dur(buf, ts2.duration_since(self.ts1));
|
||||
if false && self.nlog < 6 {
|
||||
self.nlog += 1;
|
||||
info!("{:?} ret {:?}", self.disk_io_tune, ret);
|
||||
}
|
||||
Ready(Some(Ok(ret)))
|
||||
}
|
||||
}
|
||||
Ready(Err(e)) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e.into())))
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn start_read5(
|
||||
path: PathBuf,
|
||||
file: File,
|
||||
tx: async_channel::Sender<Result<FileChunkRead, Error>>,
|
||||
disk_io_tune: DiskIoTune,
|
||||
) -> Result<(), Error> {
|
||||
let fut = async move {
|
||||
let mut file = file;
|
||||
let pos_beg = match file.stream_position().await {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
error!("stream_position {e} {path:?}");
|
||||
if let Err(_) = tx
|
||||
.send(Err(Error::with_msg_no_trace(format!("seek error {path:?}"))))
|
||||
.await
|
||||
{
|
||||
error!("broken channel");
|
||||
}
|
||||
return;
|
||||
}
|
||||
};
|
||||
let mut pos = pos_beg;
|
||||
info!("read5 begin {disk_io_tune:?}");
|
||||
loop {
|
||||
let mut buf = BytesMut::new();
|
||||
buf.resize(disk_io_tune.read_buffer_len, 0);
|
||||
match tokio::time::timeout(Duration::from_millis(8000), file.read(&mut buf)).await {
|
||||
Ok(Ok(n)) => {
|
||||
if n == 0 {
|
||||
//info!("read5 EOF pos_beg {pos_beg} pos {pos} path {path:?}");
|
||||
break;
|
||||
}
|
||||
pos += n as u64;
|
||||
buf.truncate(n);
|
||||
let item = FileChunkRead::with_buf(buf);
|
||||
match tx.send(Ok(item)).await {
|
||||
Ok(()) => {}
|
||||
Err(_) => {
|
||||
//error!("broken channel");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Err(e)) => match tx.send(Err(e.into())).await {
|
||||
Ok(()) => {
|
||||
break;
|
||||
}
|
||||
Err(_) => {
|
||||
//error!("broken channel");
|
||||
break;
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
let msg = format!("I/O timeout pos_beg {pos_beg} pos {pos} path {path:?}");
|
||||
error!("{msg}");
|
||||
let e = Error::with_msg_no_trace(msg);
|
||||
match tx.send(Err(e)).await {
|
||||
Ok(()) => {}
|
||||
Err(_e) => {
|
||||
//error!("broken channel");
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let n = pos - pos_beg;
|
||||
info!("read5 done {n}");
|
||||
};
|
||||
tokio::task::spawn(fut);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub struct FileContentStream5 {
|
||||
rx: async_channel::Receiver<Result<FileChunkRead, Error>>,
|
||||
}
|
||||
|
||||
impl FileContentStream5 {
|
||||
pub fn new(path: PathBuf, file: File, disk_io_tune: DiskIoTune) -> Result<Self, Error> {
|
||||
let (tx, rx) = async_channel::bounded(32);
|
||||
start_read5(path, file, tx, disk_io_tune)?;
|
||||
let ret = Self { rx };
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for FileContentStream5 {
|
||||
type Item = Result<FileChunkRead, Error>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
self.rx.poll_next_unpin(cx)
|
||||
}
|
||||
}
|
||||
|
||||
enum FCS2 {
|
||||
Idle,
|
||||
Reading(
|
||||
(
|
||||
Box<BytesMut>,
|
||||
Pin<Box<dyn Future<Output = Result<usize, Error>> + Send>>,
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
pub struct FileContentStream2 {
|
||||
fcs: FCS2,
|
||||
file: Pin<Box<File>>,
|
||||
disk_io_tune: DiskIoTune,
|
||||
done: bool,
|
||||
complete: bool,
|
||||
}
|
||||
|
||||
impl FileContentStream2 {
|
||||
pub fn type_name() -> &'static str {
|
||||
std::any::type_name::<Self>()
|
||||
}
|
||||
|
||||
pub fn new(file: File, disk_io_tune: DiskIoTune) -> Self {
|
||||
let file = Box::pin(file);
|
||||
Self {
|
||||
fcs: FCS2::Idle,
|
||||
file,
|
||||
disk_io_tune,
|
||||
done: false,
|
||||
complete: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn make_reading(&mut self) {
|
||||
let mut buf = Box::new(BytesMut::with_capacity(self.disk_io_tune.read_buffer_len));
|
||||
let bufref = unsafe { &mut *((&mut buf as &mut BytesMut) as *mut BytesMut) };
|
||||
let fileref = unsafe { &mut *((&mut self.file) as *mut Pin<Box<File>>) };
|
||||
let fut = AsyncReadExt::read_buf(fileref, bufref).map_err(|e| e.into());
|
||||
self.fcs = FCS2::Reading((buf, Box::pin(fut)));
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for FileContentStream2 {
|
||||
type Item = Result<FileChunkRead, Error>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
loop {
|
||||
break if self.complete {
|
||||
panic!("{} poll_next on complete", Self::type_name())
|
||||
} else if self.done {
|
||||
self.complete = true;
|
||||
Ready(None)
|
||||
} else {
|
||||
match self.fcs {
|
||||
FCS2::Idle => {
|
||||
self.make_reading();
|
||||
continue;
|
||||
}
|
||||
FCS2::Reading((ref mut buf, ref mut fut)) => match fut.poll_unpin(cx) {
|
||||
Ready(Ok(n)) => {
|
||||
let buf2 = std::mem::replace(buf as &mut BytesMut, BytesMut::new());
|
||||
let item = FileChunkRead::with_buf(buf2);
|
||||
if n == 0 {
|
||||
self.done = true;
|
||||
} else {
|
||||
self.make_reading();
|
||||
}
|
||||
Ready(Some(Ok(item)))
|
||||
}
|
||||
Ready(Err(e)) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e.into())))
|
||||
}
|
||||
Pending => Pending,
|
||||
},
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum FCS3 {
|
||||
GetPosition,
|
||||
ReadingSimple,
|
||||
Reading,
|
||||
}
|
||||
|
||||
enum ReadStep {
|
||||
Fut(Pin<Box<dyn Future<Output = Result<read3::ReadResult, Error>> + Send>>),
|
||||
Res(Result<read3::ReadResult, Error>),
|
||||
}
|
||||
|
||||
pub struct FileContentStream3 {
|
||||
fcs: FCS3,
|
||||
file: Pin<Box<File>>,
|
||||
file_pos: u64,
|
||||
eof: bool,
|
||||
disk_io_tune: DiskIoTune,
|
||||
get_position_fut: Pin<Box<dyn Future<Output = Result<u64, Error>> + Send>>,
|
||||
read_fut: Pin<Box<dyn Future<Output = Result<read3::ReadResult, Error>> + Send>>,
|
||||
reads: VecDeque<ReadStep>,
|
||||
done: bool,
|
||||
complete: bool,
|
||||
}
|
||||
|
||||
impl FileContentStream3 {
|
||||
pub fn new(file: File, disk_io_tune: DiskIoTune) -> Self {
|
||||
let mut file = Box::pin(file);
|
||||
let ffr = unsafe {
|
||||
let ffr = Pin::get_unchecked_mut(file.as_mut());
|
||||
std::mem::transmute::<&mut File, &'static mut File>(ffr)
|
||||
};
|
||||
let ff = ffr
|
||||
.seek(SeekFrom::Current(0))
|
||||
.map_err(|_| Error::with_msg_no_trace(format!("Seek error")));
|
||||
Self {
|
||||
fcs: FCS3::GetPosition,
|
||||
file,
|
||||
file_pos: 0,
|
||||
eof: false,
|
||||
disk_io_tune,
|
||||
get_position_fut: Box::pin(ff),
|
||||
read_fut: Box::pin(futures_util::future::ready(Err(Error::with_msg_no_trace(format!(
|
||||
"dummy"
|
||||
))))),
|
||||
reads: VecDeque::new(),
|
||||
done: false,
|
||||
complete: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for FileContentStream3 {
|
||||
type Item = Result<FileChunkRead, Error>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
loop {
|
||||
break if self.complete {
|
||||
panic!("poll_next on complete")
|
||||
} else if self.done {
|
||||
self.complete = true;
|
||||
Ready(None)
|
||||
} else {
|
||||
match self.fcs {
|
||||
FCS3::GetPosition => match self.get_position_fut.poll_unpin(cx) {
|
||||
Ready(Ok(k)) => {
|
||||
info!("current file pos: {k}");
|
||||
self.file_pos = k;
|
||||
if false {
|
||||
let fd = self.file.as_raw_fd();
|
||||
let count = self.disk_io_tune.read_buffer_len as u64;
|
||||
self.read_fut = Box::pin(read3::Read3::get().read(fd, self.file_pos, count));
|
||||
self.file_pos += count;
|
||||
self.fcs = FCS3::ReadingSimple;
|
||||
} else {
|
||||
self.fcs = FCS3::Reading;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Ready(Err(e)) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Pending => Pending,
|
||||
},
|
||||
FCS3::ReadingSimple => match self.read_fut.poll_unpin(cx) {
|
||||
Ready(Ok(res)) => {
|
||||
if res.eof {
|
||||
let item = FileChunkRead::with_buf(res.buf);
|
||||
self.done = true;
|
||||
Ready(Some(Ok(item)))
|
||||
} else {
|
||||
let item = FileChunkRead::with_buf(res.buf);
|
||||
let fd = self.file.as_raw_fd();
|
||||
let count = self.disk_io_tune.read_buffer_len as u64;
|
||||
self.read_fut = Box::pin(read3::Read3::get().read(fd, self.file_pos, count));
|
||||
self.file_pos += count;
|
||||
Ready(Some(Ok(item)))
|
||||
}
|
||||
}
|
||||
Ready(Err(e)) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Pending => Pending,
|
||||
},
|
||||
FCS3::Reading => {
|
||||
while !self.eof && self.reads.len() < self.disk_io_tune.read_queue_len {
|
||||
let fd = self.file.as_raw_fd();
|
||||
let pos = self.file_pos;
|
||||
let count = self.disk_io_tune.read_buffer_len as u64;
|
||||
trace!("create ReadTask fd {fd} pos {pos} count {count}");
|
||||
let r3 = read3::Read3::get();
|
||||
let fut = r3.read(fd, pos, count);
|
||||
self.reads.push_back(ReadStep::Fut(Box::pin(fut)));
|
||||
self.file_pos += count;
|
||||
}
|
||||
for e in &mut self.reads {
|
||||
match e {
|
||||
ReadStep::Fut(k) => match k.poll_unpin(cx) {
|
||||
Ready(k) => {
|
||||
trace!("received a result");
|
||||
*e = ReadStep::Res(k);
|
||||
}
|
||||
Pending => {}
|
||||
},
|
||||
ReadStep::Res(_) => {}
|
||||
}
|
||||
}
|
||||
if let Some(ReadStep::Res(_)) = self.reads.front() {
|
||||
if let Some(ReadStep::Res(res)) = self.reads.pop_front() {
|
||||
trace!("pop front result");
|
||||
match res {
|
||||
Ok(rr) => {
|
||||
if rr.eof {
|
||||
if self.eof {
|
||||
trace!("see EOF in ReadResult AGAIN");
|
||||
} else {
|
||||
debug!("see EOF in ReadResult SET OUR FLAG");
|
||||
self.eof = true;
|
||||
}
|
||||
}
|
||||
let res = FileChunkRead::with_buf(rr.buf);
|
||||
Ready(Some(Ok(res)))
|
||||
}
|
||||
Err(e) => {
|
||||
error!("received ReadResult error: {e}");
|
||||
self.done = true;
|
||||
let e = Error::with_msg(format!("I/O error: {e}"));
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.done = true;
|
||||
let e = Error::with_msg(format!("logic error"));
|
||||
error!("{e}");
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
} else if let None = self.reads.front() {
|
||||
debug!("empty read fut queue, end");
|
||||
self.done = true;
|
||||
continue;
|
||||
} else {
|
||||
trace!("read fut queue Pending");
|
||||
Pending
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum FCS4 {
|
||||
Init,
|
||||
Setup,
|
||||
Reading,
|
||||
}
|
||||
|
||||
pub struct FileContentStream4 {
|
||||
fcs: FCS4,
|
||||
file: Pin<Box<File>>,
|
||||
disk_io_tune: DiskIoTune,
|
||||
setup_fut:
|
||||
Option<Pin<Box<dyn Future<Output = Result<mpsc::Receiver<Result<read4::ReadResult, Error>>, Error>> + Send>>>,
|
||||
inp: Option<mpsc::Receiver<Result<read4::ReadResult, Error>>>,
|
||||
recv_fut: Pin<Box<dyn Future<Output = Option<Result<read4::ReadResult, Error>>> + Send>>,
|
||||
done: bool,
|
||||
complete: bool,
|
||||
}
|
||||
|
||||
impl FileContentStream4 {
|
||||
pub fn new(file: File, disk_io_tune: DiskIoTune) -> Self {
|
||||
let file = Box::pin(file);
|
||||
Self {
|
||||
fcs: FCS4::Init,
|
||||
file,
|
||||
disk_io_tune,
|
||||
setup_fut: None,
|
||||
inp: None,
|
||||
recv_fut: Box::pin(futures_util::future::ready(Some(Err(Error::with_msg_no_trace(
|
||||
format!("dummy"),
|
||||
))))),
|
||||
done: false,
|
||||
complete: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for FileContentStream4 {
|
||||
type Item = Result<FileChunkRead, Error>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
loop {
|
||||
break if self.complete {
|
||||
panic!("poll_next on complete")
|
||||
} else if self.done {
|
||||
self.complete = true;
|
||||
Ready(None)
|
||||
} else {
|
||||
match self.fcs {
|
||||
FCS4::Init => {
|
||||
let read4 = read4::Read4::get();
|
||||
let fd = self.file.as_raw_fd();
|
||||
let buflen = self.disk_io_tune.read_buffer_len as u64;
|
||||
let fut = read4.read(fd, buflen, self.disk_io_tune.read_queue_len);
|
||||
self.setup_fut = Some(Box::pin(fut) as _);
|
||||
self.fcs = FCS4::Setup;
|
||||
continue;
|
||||
}
|
||||
FCS4::Setup => match self.setup_fut.as_mut().unwrap().poll_unpin(cx) {
|
||||
Ready(k) => match k {
|
||||
Ok(k) => {
|
||||
self.setup_fut = None;
|
||||
self.fcs = FCS4::Reading;
|
||||
self.inp = Some(k);
|
||||
// TODO
|
||||
let rm = self.inp.as_mut().unwrap();
|
||||
let rm = unsafe {
|
||||
std::mem::transmute::<
|
||||
&mut mpsc::Receiver<Result<read4::ReadResult, Error>>,
|
||||
&'static mut mpsc::Receiver<Result<read4::ReadResult, Error>>,
|
||||
>(rm)
|
||||
};
|
||||
self.recv_fut = Box::pin(rm.recv()) as _;
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
self.done = true;
|
||||
let e = Error::with_msg_no_trace(format!("init failed {e:?}"));
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
},
|
||||
Pending => Pending,
|
||||
},
|
||||
FCS4::Reading => match self.recv_fut.poll_unpin(cx) {
|
||||
Ready(k) => match k {
|
||||
Some(k) => match k {
|
||||
Ok(k) => {
|
||||
// TODO
|
||||
let rm = self.inp.as_mut().unwrap();
|
||||
let rm = unsafe {
|
||||
std::mem::transmute::<
|
||||
&mut mpsc::Receiver<Result<read4::ReadResult, Error>>,
|
||||
&'static mut mpsc::Receiver<Result<read4::ReadResult, Error>>,
|
||||
>(rm)
|
||||
};
|
||||
self.recv_fut = Box::pin(rm.recv()) as _;
|
||||
let item = FileChunkRead::with_buf(k.buf);
|
||||
Ready(Some(Ok(item)))
|
||||
}
|
||||
Err(e) => {
|
||||
self.done = true;
|
||||
let e = Error::with_msg_no_trace(format!("init failed {e:?}"));
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
},
|
||||
None => {
|
||||
self.done = true;
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Pending => Pending,
|
||||
},
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn file_content_stream(
|
||||
path: PathBuf,
|
||||
file: File,
|
||||
disk_io_tune: DiskIoTune,
|
||||
) -> Pin<Box<dyn Stream<Item = Result<FileChunkRead, Error>> + Send>> {
|
||||
debug!("file_content_stream disk_io_tune {disk_io_tune:?}");
|
||||
match &disk_io_tune.read_sys {
|
||||
ReadSys::TokioAsyncRead => {
|
||||
let s = FileContentStream::new(file, disk_io_tune);
|
||||
Box::pin(s) as Pin<Box<dyn Stream<Item = _> + Send>>
|
||||
}
|
||||
ReadSys::Read2 => {
|
||||
let s = FileContentStream2::new(file, disk_io_tune);
|
||||
Box::pin(s) as _
|
||||
}
|
||||
ReadSys::Read3 => {
|
||||
let s = FileContentStream3::new(file, disk_io_tune);
|
||||
Box::pin(s) as _
|
||||
}
|
||||
ReadSys::Read4 => {
|
||||
let s = FileContentStream4::new(file, disk_io_tune);
|
||||
Box::pin(s) as _
|
||||
}
|
||||
ReadSys::Read5 => {
|
||||
let s = FileContentStream5::new(path, file, disk_io_tune).unwrap();
|
||||
Box::pin(s) as _
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait ChannelConfigExt {
|
||||
fn dtflags(&self) -> u8;
|
||||
}
|
||||
|
||||
impl ChannelConfigExt for SfDbChConf {
|
||||
fn dtflags(&self) -> u8 {
|
||||
let mut ret = 0;
|
||||
if self.compression {
|
||||
ret |= COMPRESSION;
|
||||
}
|
||||
match self.shape {
|
||||
Shape::Scalar => {}
|
||||
Shape::Wave(_) => {
|
||||
ret |= SHAPE;
|
||||
}
|
||||
Shape::Image(_, _) => {
|
||||
ret |= SHAPE;
|
||||
}
|
||||
}
|
||||
if self.byte_order.is_be() {
|
||||
ret |= BIG_ENDIAN;
|
||||
}
|
||||
if self.array {
|
||||
ret |= ARRAY;
|
||||
}
|
||||
ret
|
||||
}
|
||||
}
|
||||
420
crates/disk/src/eventblobs.rs
Normal file
420
crates/disk/src/eventblobs.rs
Normal file
@@ -0,0 +1,420 @@
|
||||
use crate::dataopen::open_expanded_files;
|
||||
use crate::dataopen::open_files;
|
||||
use crate::dataopen::OpenedFileSet;
|
||||
use crate::eventchunker::EventChunker;
|
||||
use crate::eventchunker::EventChunkerConf;
|
||||
use err::Error;
|
||||
use futures_util::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use items_0::streamitem::LogItem;
|
||||
use items_0::streamitem::RangeCompletableItem;
|
||||
use items_0::streamitem::Sitemty;
|
||||
use items_0::streamitem::StreamItem;
|
||||
use items_0::WithLen;
|
||||
use items_2::eventfull::EventFull;
|
||||
use items_2::merger::Merger;
|
||||
use netpod::log::*;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::timeunits::SEC;
|
||||
use netpod::DiskIoTune;
|
||||
use netpod::Node;
|
||||
use netpod::SfChFetchInfo;
|
||||
use std::collections::VecDeque;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
use streams::rangefilter2::RangeFilter2;
|
||||
|
||||
pub trait InputTraits: Stream<Item = Sitemty<EventFull>> {}
|
||||
|
||||
impl<T> InputTraits for T where T: Stream<Item = Sitemty<EventFull>> {}
|
||||
|
||||
pub struct EventChunkerMultifile {
|
||||
fetch_info: SfChFetchInfo,
|
||||
file_chan: async_channel::Receiver<Result<OpenedFileSet, Error>>,
|
||||
evs: Option<Pin<Box<dyn InputTraits + Send>>>,
|
||||
disk_io_tune: DiskIoTune,
|
||||
event_chunker_conf: EventChunkerConf,
|
||||
range: NanoRange,
|
||||
files_count: u32,
|
||||
node_ix: usize,
|
||||
expand: bool,
|
||||
do_decompress: bool,
|
||||
max_ts: u64,
|
||||
out_max_len: usize,
|
||||
emit_count: usize,
|
||||
do_emit_err_after: Option<usize>,
|
||||
range_final: bool,
|
||||
log_queue: VecDeque<LogItem>,
|
||||
done: bool,
|
||||
done_emit_range_final: bool,
|
||||
complete: bool,
|
||||
}
|
||||
|
||||
impl EventChunkerMultifile {
|
||||
pub fn type_name() -> &'static str {
|
||||
std::any::type_name::<Self>()
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
range: NanoRange,
|
||||
fetch_info: SfChFetchInfo,
|
||||
node: Node,
|
||||
node_ix: usize,
|
||||
disk_io_tune: DiskIoTune,
|
||||
event_chunker_conf: EventChunkerConf,
|
||||
expand: bool,
|
||||
do_decompress: bool,
|
||||
out_max_len: usize,
|
||||
) -> Self {
|
||||
info!("EventChunkerMultifile expand {expand} do_decompress {do_decompress}");
|
||||
let file_chan = if expand {
|
||||
open_expanded_files(&range, &fetch_info, node)
|
||||
} else {
|
||||
open_files(&range, &fetch_info, node)
|
||||
};
|
||||
Self {
|
||||
file_chan,
|
||||
evs: None,
|
||||
disk_io_tune,
|
||||
event_chunker_conf,
|
||||
fetch_info,
|
||||
range,
|
||||
files_count: 0,
|
||||
node_ix,
|
||||
expand,
|
||||
do_decompress,
|
||||
max_ts: 0,
|
||||
out_max_len,
|
||||
emit_count: 0,
|
||||
do_emit_err_after: None,
|
||||
range_final: false,
|
||||
log_queue: VecDeque::new(),
|
||||
done: false,
|
||||
done_emit_range_final: false,
|
||||
complete: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for EventChunkerMultifile {
|
||||
type Item = Result<StreamItem<RangeCompletableItem<EventFull>>, Error>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
let span1 = span!(Level::INFO, "EvChMul", node_ix = self.node_ix);
|
||||
let _spg = span1.enter();
|
||||
info!("EventChunkerMultifile poll_next");
|
||||
use Poll::*;
|
||||
'outer: loop {
|
||||
break if let Some(item) = self.log_queue.pop_front() {
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
} else if self.complete {
|
||||
panic!("{} poll_next on complete", Self::type_name());
|
||||
} else if self.done_emit_range_final {
|
||||
self.complete = true;
|
||||
Ready(None)
|
||||
} else if self.done {
|
||||
self.done_emit_range_final = true;
|
||||
if self.range_final {
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
match &mut self.evs {
|
||||
Some(evs) => match evs.poll_next_unpin(cx) {
|
||||
Ready(Some(Ok(k))) => {
|
||||
let k = if let StreamItem::DataItem(RangeCompletableItem::Data(h)) = k {
|
||||
let mut h: EventFull = h;
|
||||
if h.len() > 0 {
|
||||
let min = h.tss.iter().fold(u64::MAX, |a, &x| a.min(x));
|
||||
let max = h.tss.iter().fold(u64::MIN, |a, &x| a.max(x));
|
||||
if min <= self.max_ts {
|
||||
let msg = format!("EventChunkerMultifile repeated or unordered ts {}", min);
|
||||
error!("{}", msg);
|
||||
let item = LogItem {
|
||||
node_ix: self.node_ix as _,
|
||||
level: Level::INFO,
|
||||
msg,
|
||||
};
|
||||
self.log_queue.push_back(item);
|
||||
}
|
||||
self.max_ts = max;
|
||||
if let Some(after) = self.do_emit_err_after {
|
||||
if self.emit_count < after {
|
||||
debug!(
|
||||
"EventChunkerMultifile emit {}/{} events {}",
|
||||
self.emit_count,
|
||||
after,
|
||||
h.len()
|
||||
);
|
||||
self.emit_count += 1;
|
||||
}
|
||||
}
|
||||
if max >= self.range.end {
|
||||
self.range_final = true;
|
||||
h.truncate_ts(self.range.end);
|
||||
self.evs = None;
|
||||
let (tx, rx) = async_channel::bounded(1);
|
||||
drop(tx);
|
||||
self.file_chan = rx;
|
||||
}
|
||||
}
|
||||
StreamItem::DataItem(RangeCompletableItem::Data(h))
|
||||
} else {
|
||||
k
|
||||
};
|
||||
Ready(Some(Ok(k)))
|
||||
}
|
||||
Ready(Some(Err(e))) => {
|
||||
error!("{e}");
|
||||
self.done = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Ready(None) => {
|
||||
self.evs = None;
|
||||
continue 'outer;
|
||||
}
|
||||
Pending => Pending,
|
||||
},
|
||||
None => match self.file_chan.poll_next_unpin(cx) {
|
||||
Ready(Some(k)) => match k {
|
||||
Ok(ofs) => {
|
||||
self.files_count += ofs.files.len() as u32;
|
||||
if ofs.files.len() == 1 {
|
||||
let mut ofs = ofs;
|
||||
let file = ofs.files.pop().unwrap();
|
||||
let path = file.path;
|
||||
let msg = format!("handle OFS {:?}", ofs);
|
||||
debug!("{}", msg);
|
||||
let item = LogItem::quick(Level::INFO, msg);
|
||||
match file.file {
|
||||
Some(file) => {
|
||||
let inp = Box::pin(crate::file_content_stream(
|
||||
path.clone(),
|
||||
file,
|
||||
self.disk_io_tune.clone(),
|
||||
));
|
||||
let chunker = EventChunker::from_event_boundary(
|
||||
inp,
|
||||
self.fetch_info.clone(),
|
||||
self.range.clone(),
|
||||
self.event_chunker_conf.clone(),
|
||||
path.clone(),
|
||||
self.expand,
|
||||
self.do_decompress,
|
||||
);
|
||||
let filtered = RangeFilter2::new(chunker, self.range.clone(), self.expand);
|
||||
self.evs = Some(Box::pin(filtered));
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
} else if ofs.files.len() == 0 {
|
||||
let msg = format!("handle OFS {:?} NO FILES", ofs);
|
||||
debug!("{}", msg);
|
||||
let item = LogItem::quick(Level::INFO, msg);
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
} else {
|
||||
let msg = format!("handle OFS MERGED timebin {}", ofs.timebin);
|
||||
info!("{}", msg);
|
||||
for x in &ofs.files {
|
||||
info!(" path {:?}", x.path);
|
||||
}
|
||||
let item = LogItem::quick(Level::INFO, msg);
|
||||
let mut chunkers = Vec::new();
|
||||
for of in ofs.files {
|
||||
if let Some(file) = of.file {
|
||||
let inp = crate::file_content_stream(
|
||||
of.path.clone(),
|
||||
file,
|
||||
self.disk_io_tune.clone(),
|
||||
);
|
||||
let chunker = EventChunker::from_event_boundary(
|
||||
inp,
|
||||
self.fetch_info.clone(),
|
||||
self.range.clone(),
|
||||
self.event_chunker_conf.clone(),
|
||||
of.path.clone(),
|
||||
self.expand,
|
||||
self.do_decompress,
|
||||
);
|
||||
chunkers.push(Box::pin(chunker) as _);
|
||||
}
|
||||
}
|
||||
let merged = Merger::new(chunkers, self.out_max_len);
|
||||
let filtered = RangeFilter2::new(merged, self.range.clone(), self.expand);
|
||||
self.evs = Some(Box::pin(filtered));
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
self.done = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
},
|
||||
Ready(None) => {
|
||||
self.done = true;
|
||||
let item = LogItem::quick(
|
||||
Level::INFO,
|
||||
format!(
|
||||
"EventChunkerMultifile used {} datafiles beg {} end {} node_ix {}",
|
||||
self.files_count,
|
||||
self.range.beg / SEC,
|
||||
self.range.end / SEC,
|
||||
self.node_ix
|
||||
),
|
||||
);
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
}
|
||||
Pending => Pending,
|
||||
},
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO re-enable tests generate data on the fly.
|
||||
#[cfg(DISABLED)]
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::eventblobs::EventChunkerMultifile;
|
||||
use crate::eventchunker::EventChunkerConf;
|
||||
use crate::SfDbChConf;
|
||||
use err::Error;
|
||||
use futures_util::StreamExt;
|
||||
use items_0::streamitem::RangeCompletableItem;
|
||||
use items_0::streamitem::StreamItem;
|
||||
use items_0::WithLen;
|
||||
use netpod::log::*;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::timeunits::DAY;
|
||||
use netpod::timeunits::MS;
|
||||
use netpod::ByteSize;
|
||||
use netpod::DiskIoTune;
|
||||
use netpod::TsNano;
|
||||
use streams::rangefilter2::RangeFilter2;
|
||||
|
||||
const BACKEND: &str = "testbackend-00";
|
||||
|
||||
fn read_expanded_for_range(range: NanoRange, nodeix: usize) -> Result<(usize, Vec<u64>), Error> {
|
||||
let chn = netpod::SfDbChannel {
|
||||
backend: BACKEND.into(),
|
||||
name: "scalar-i32-be".into(),
|
||||
series: None,
|
||||
};
|
||||
// TODO read config from disk.
|
||||
let channel_config = SfDbChConf {
|
||||
channel: chn,
|
||||
keyspace: 2,
|
||||
time_bin_size: TsNano(DAY),
|
||||
scalar_type: netpod::ScalarType::I32,
|
||||
byte_order: netpod::ByteOrder::Big,
|
||||
shape: netpod::Shape::Scalar,
|
||||
array: false,
|
||||
compression: false,
|
||||
};
|
||||
let cluster = netpod::test_cluster();
|
||||
let node = cluster.nodes[nodeix].clone();
|
||||
let event_chunker_conf = EventChunkerConf {
|
||||
disk_stats_every: ByteSize::kb(1024),
|
||||
};
|
||||
let disk_io_tune = DiskIoTune::default_for_testing();
|
||||
let task = async move {
|
||||
let mut event_count = 0;
|
||||
let events = EventChunkerMultifile::new(
|
||||
range.clone(),
|
||||
channel_config,
|
||||
node,
|
||||
nodeix,
|
||||
disk_io_tune,
|
||||
event_chunker_conf,
|
||||
true,
|
||||
true,
|
||||
// TODO do asserts depend on this?
|
||||
32,
|
||||
);
|
||||
//let mut events = MergedStream::new(vec![events], range.clone(), true);
|
||||
let mut events = RangeFilter2::new(events, range.clone(), true);
|
||||
let mut tss = Vec::new();
|
||||
while let Some(item) = events.next().await {
|
||||
match item {
|
||||
Ok(item) => match item {
|
||||
StreamItem::DataItem(item) => match item {
|
||||
RangeCompletableItem::Data(item) => {
|
||||
// TODO assert more
|
||||
debug!("item: {:?}", item.tss.iter().map(|x| x / MS).collect::<Vec<_>>());
|
||||
event_count += item.len();
|
||||
for ts in item.tss {
|
||||
tss.push(ts);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
},
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
Ok((event_count, tss))
|
||||
};
|
||||
Ok(taskrun::run(task).unwrap())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_expanded_0() -> Result<(), Error> {
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 0,
|
||||
end: DAY + MS * 100,
|
||||
};
|
||||
let res = read_expanded_for_range(range, 0)?;
|
||||
// TODO assert more
|
||||
debug!("got {:?}", res.1);
|
||||
if res.0 != 3 {
|
||||
Err(Error::with_msg(format!("unexpected number of events: {}", res.0)))?;
|
||||
}
|
||||
assert_eq!(res.1, vec![DAY - MS * 1500, DAY, DAY + MS * 1500]);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_expanded_1() -> Result<(), Error> {
|
||||
let range = NanoRange {
|
||||
beg: DAY + MS * 0,
|
||||
end: DAY + MS * 1501,
|
||||
};
|
||||
let res = read_expanded_for_range(range, 0)?;
|
||||
if res.0 != 4 {
|
||||
Err(Error::with_msg(format!("unexpected number of events: {}", res.0)))?;
|
||||
}
|
||||
assert_eq!(res.1, vec![DAY - MS * 1500, DAY, DAY + MS * 1500, DAY + MS * 3000]);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_expanded_2() -> Result<(), Error> {
|
||||
let range = NanoRange {
|
||||
beg: DAY - MS * 100,
|
||||
end: DAY + MS * 1501,
|
||||
};
|
||||
let res = read_expanded_for_range(range, 0)?;
|
||||
assert_eq!(res.1, vec![DAY - MS * 1500, DAY, DAY + MS * 1500, DAY + MS * 3000]);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_expanded_3() -> Result<(), Error> {
|
||||
use netpod::timeunits::*;
|
||||
let range = NanoRange {
|
||||
beg: DAY - MS * 1500,
|
||||
end: DAY + MS * 1501,
|
||||
};
|
||||
let res = read_expanded_for_range(range, 0)?;
|
||||
assert_eq!(
|
||||
res.1,
|
||||
vec![DAY - MS * 3000, DAY - MS * 1500, DAY, DAY + MS * 1500, DAY + MS * 3000]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
673
crates/disk/src/eventchunker.rs
Normal file
673
crates/disk/src/eventchunker.rs
Normal file
@@ -0,0 +1,673 @@
|
||||
use bitshuffle::bitshuffle_decompress;
|
||||
use bytes::Buf;
|
||||
use bytes::BytesMut;
|
||||
use err::thiserror;
|
||||
use err::Error;
|
||||
use futures_util::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use items_0::streamitem::RangeCompletableItem;
|
||||
use items_0::streamitem::StatsItem;
|
||||
use items_0::streamitem::StreamItem;
|
||||
use items_0::Empty;
|
||||
use items_0::WithLen;
|
||||
use items_2::eventfull::EventFull;
|
||||
use netpod::histo::HistoLog2;
|
||||
use netpod::log::*;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::timeunits::SEC;
|
||||
use netpod::ByteSize;
|
||||
use netpod::EventDataReadStats;
|
||||
use netpod::ScalarType;
|
||||
use netpod::SfChFetchInfo;
|
||||
use netpod::Shape;
|
||||
use parse::channelconfig::CompressionMethod;
|
||||
use std::io::Cursor;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
use std::time::Instant;
|
||||
use streams::dtflags::*;
|
||||
use streams::filechunkread::FileChunkRead;
|
||||
use streams::needminbuffer::NeedMinBuffer;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum DataParseError {
|
||||
#[error("DataFrameLengthMismatch")]
|
||||
DataFrameLengthMismatch,
|
||||
#[error("FileHeaderTooShort")]
|
||||
FileHeaderTooShort,
|
||||
#[error("BadVersionTag")]
|
||||
BadVersionTag,
|
||||
#[error("HeaderTooLarge")]
|
||||
HeaderTooLarge,
|
||||
#[error("Utf8Error")]
|
||||
Utf8Error,
|
||||
#[error("EventTooShort")]
|
||||
EventTooShort,
|
||||
#[error("EventTooLong")]
|
||||
EventTooLong,
|
||||
#[error("TooManyBeforeRange")]
|
||||
TooManyBeforeRange,
|
||||
#[error("EventWithOptional")]
|
||||
EventWithOptional,
|
||||
#[error("BadTypeIndex")]
|
||||
BadTypeIndex,
|
||||
#[error("WaveShapeWithoutEventArray")]
|
||||
WaveShapeWithoutEventArray,
|
||||
#[error("ShapedWithoutDims")]
|
||||
ShapedWithoutDims,
|
||||
#[error("TooManyDims")]
|
||||
TooManyDims,
|
||||
#[error("UnknownCompression")]
|
||||
UnknownCompression,
|
||||
#[error("BadCompresionBlockSize")]
|
||||
BadCompresionBlockSize,
|
||||
}
|
||||
|
||||
pub struct EventChunker {
|
||||
inp: NeedMinBuffer,
|
||||
state: DataFileState,
|
||||
need_min: u32,
|
||||
fetch_info: SfChFetchInfo,
|
||||
need_min_max: u32,
|
||||
errored: bool,
|
||||
completed: bool,
|
||||
range: NanoRange,
|
||||
stats_conf: EventChunkerConf,
|
||||
seen_beyond_range: bool,
|
||||
sent_beyond_range: bool,
|
||||
data_emit_complete: bool,
|
||||
final_stats_sent: bool,
|
||||
parsed_bytes: u64,
|
||||
dbg_path: PathBuf,
|
||||
last_ts: u64,
|
||||
expand: bool,
|
||||
do_decompress: bool,
|
||||
decomp_dt_histo: HistoLog2,
|
||||
item_len_emit_histo: HistoLog2,
|
||||
seen_before_range_count: usize,
|
||||
seen_after_range_count: usize,
|
||||
unordered_count: usize,
|
||||
repeated_ts_count: usize,
|
||||
config_mismatch_discard: usize,
|
||||
discard_count: usize,
|
||||
}
|
||||
|
||||
impl Drop for EventChunker {
|
||||
fn drop(&mut self) {
|
||||
// TODO collect somewhere
|
||||
if self.config_mismatch_discard != 0 {
|
||||
warn!("config_mismatch_discard {}", self.config_mismatch_discard);
|
||||
}
|
||||
debug!(
|
||||
"EventChunker Drop Stats:\ndecomp_dt_histo: {:?}\nitem_len_emit_histo: {:?}",
|
||||
self.decomp_dt_histo, self.item_len_emit_histo
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
enum DataFileState {
|
||||
FileHeader,
|
||||
Event,
|
||||
}
|
||||
|
||||
struct ParseResult {
|
||||
events: EventFull,
|
||||
parsed_bytes: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct EventChunkerConf {
|
||||
pub disk_stats_every: ByteSize,
|
||||
}
|
||||
|
||||
impl EventChunkerConf {
|
||||
pub fn new(disk_stats_every: ByteSize) -> Self {
|
||||
Self { disk_stats_every }
|
||||
}
|
||||
}
|
||||
|
||||
fn is_config_match(is_array: &bool, ele_count: &u64, fetch_info: &SfChFetchInfo) -> bool {
|
||||
match fetch_info.shape() {
|
||||
Shape::Scalar => {
|
||||
if *is_array {
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
Shape::Wave(dim1count) => {
|
||||
if (*dim1count as u64) != *ele_count {
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
Shape::Image(n1, n2) => {
|
||||
let nt = (*n1 as u64) * (*n2 as u64);
|
||||
if nt != *ele_count {
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum DecompError {
|
||||
#[error("Error")]
|
||||
Error,
|
||||
}
|
||||
|
||||
fn decompress(databuf: &[u8], type_size: u32, ele_count: u64) -> Result<Vec<u8>, DecompError> {
|
||||
if databuf.len() < 13 {
|
||||
return Err(DecompError::Error);
|
||||
}
|
||||
let ts1 = Instant::now();
|
||||
let decomp_bytes = type_size as u64 * ele_count;
|
||||
let mut decomp = vec![0; decomp_bytes as usize];
|
||||
let ele_size = type_size;
|
||||
// TODO limit the buf slice range
|
||||
match bitshuffle_decompress(&databuf[12..], &mut decomp, ele_count as usize, ele_size as usize, 0) {
|
||||
Ok(c1) => {
|
||||
if 12 + c1 != databuf.len() {}
|
||||
let ts2 = Instant::now();
|
||||
let dt = ts2.duration_since(ts1);
|
||||
// TODO analyze the histo
|
||||
//self.decomp_dt_histo.ingest(dt.as_secs() as u32 + dt.subsec_micros());
|
||||
Ok(decomp)
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(DecompError::Error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EventChunker {
|
||||
pub fn self_name() -> &'static str {
|
||||
std::any::type_name::<Self>()
|
||||
}
|
||||
|
||||
// TODO `expand` flag usage
|
||||
pub fn from_start(
|
||||
inp: Pin<Box<dyn Stream<Item = Result<FileChunkRead, Error>> + Send>>,
|
||||
fetch_info: SfChFetchInfo,
|
||||
range: NanoRange,
|
||||
stats_conf: EventChunkerConf,
|
||||
dbg_path: PathBuf,
|
||||
expand: bool,
|
||||
do_decompress: bool,
|
||||
) -> Self {
|
||||
info!(
|
||||
"{}::{} do_decompress {}",
|
||||
Self::self_name(),
|
||||
"from_start",
|
||||
do_decompress
|
||||
);
|
||||
let need_min_max = match fetch_info.shape() {
|
||||
Shape::Scalar => 1024 * 8,
|
||||
Shape::Wave(_) => 1024 * 32,
|
||||
Shape::Image(_, _) => 1024 * 1024 * 40,
|
||||
};
|
||||
let mut inp = NeedMinBuffer::new(inp);
|
||||
inp.set_need_min(6);
|
||||
Self {
|
||||
inp,
|
||||
state: DataFileState::FileHeader,
|
||||
need_min: 6,
|
||||
need_min_max,
|
||||
fetch_info,
|
||||
errored: false,
|
||||
completed: false,
|
||||
range,
|
||||
stats_conf,
|
||||
seen_beyond_range: false,
|
||||
sent_beyond_range: false,
|
||||
data_emit_complete: false,
|
||||
final_stats_sent: false,
|
||||
parsed_bytes: 0,
|
||||
dbg_path,
|
||||
last_ts: 0,
|
||||
expand,
|
||||
do_decompress,
|
||||
decomp_dt_histo: HistoLog2::new(8),
|
||||
item_len_emit_histo: HistoLog2::new(0),
|
||||
seen_before_range_count: 0,
|
||||
seen_after_range_count: 0,
|
||||
unordered_count: 0,
|
||||
repeated_ts_count: 0,
|
||||
config_mismatch_discard: 0,
|
||||
discard_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO `expand` flag usage
|
||||
pub fn from_event_boundary(
|
||||
inp: Pin<Box<dyn Stream<Item = Result<FileChunkRead, Error>> + Send>>,
|
||||
fetch_info: SfChFetchInfo,
|
||||
range: NanoRange,
|
||||
stats_conf: EventChunkerConf,
|
||||
dbg_path: PathBuf,
|
||||
expand: bool,
|
||||
do_decompress: bool,
|
||||
) -> Self {
|
||||
info!(
|
||||
"{}::{} do_decompress {}",
|
||||
Self::self_name(),
|
||||
"from_event_boundary",
|
||||
do_decompress
|
||||
);
|
||||
let mut ret = Self::from_start(inp, fetch_info, range, stats_conf, dbg_path, expand, do_decompress);
|
||||
ret.state = DataFileState::Event;
|
||||
ret.need_min = 4;
|
||||
ret.inp.set_need_min(4);
|
||||
ret
|
||||
}
|
||||
|
||||
fn parse_buf(&mut self, buf: &mut BytesMut) -> Result<ParseResult, Error> {
|
||||
span!(Level::INFO, "EventChunker::parse_buf")
|
||||
.in_scope(|| self.parse_buf_inner(buf))
|
||||
.map_err(|e| Error::with_msg_no_trace(format!("{e:?}")))
|
||||
}
|
||||
|
||||
fn parse_buf_inner(&mut self, buf: &mut BytesMut) -> Result<ParseResult, DataParseError> {
|
||||
use byteorder::ReadBytesExt;
|
||||
use byteorder::BE;
|
||||
info!("parse_buf_inner buf len {}", buf.len());
|
||||
let mut ret = EventFull::empty();
|
||||
let mut parsed_bytes = 0;
|
||||
loop {
|
||||
if (buf.len() as u32) < self.need_min {
|
||||
break;
|
||||
}
|
||||
match self.state {
|
||||
DataFileState::FileHeader => {
|
||||
if buf.len() < 6 {
|
||||
return Err(DataParseError::FileHeaderTooShort);
|
||||
}
|
||||
let mut sl = Cursor::new(buf.as_ref());
|
||||
let fver = sl.read_i16::<BE>().unwrap();
|
||||
if fver != 0 {
|
||||
return Err(DataParseError::BadVersionTag);
|
||||
}
|
||||
let len = sl.read_i32::<BE>().unwrap();
|
||||
if len <= 0 || len >= 512 {
|
||||
return Err(DataParseError::HeaderTooLarge);
|
||||
}
|
||||
let totlen = len as usize + 2;
|
||||
if buf.len() < totlen {
|
||||
self.need_min = totlen as u32;
|
||||
break;
|
||||
} else {
|
||||
sl.advance(len as usize - 8);
|
||||
let len2 = sl.read_i32::<BE>().unwrap();
|
||||
if len != len2 {
|
||||
return Err(DataParseError::DataFrameLengthMismatch);
|
||||
}
|
||||
let _ = String::from_utf8(buf.as_ref()[6..(len as usize + 6 - 8)].to_vec())
|
||||
.map_err(|_| DataParseError::Utf8Error);
|
||||
self.state = DataFileState::Event;
|
||||
self.need_min = 4;
|
||||
buf.advance(totlen);
|
||||
parsed_bytes += totlen as u64;
|
||||
}
|
||||
}
|
||||
DataFileState::Event => {
|
||||
let p0 = 0;
|
||||
let mut sl = Cursor::new(buf.as_ref());
|
||||
let len = sl.read_i32::<BE>().unwrap();
|
||||
if len < 20 {
|
||||
return Err(DataParseError::EventTooShort);
|
||||
}
|
||||
match self.fetch_info.shape() {
|
||||
Shape::Scalar if len > 512 => return Err(DataParseError::EventTooLong),
|
||||
Shape::Wave(_) if len > 8 * 1024 * 256 => return Err(DataParseError::EventTooLong),
|
||||
Shape::Image(_, _) if len > 1024 * 1024 * 40 => return Err(DataParseError::EventTooLong),
|
||||
_ => {}
|
||||
}
|
||||
let len = len as u32;
|
||||
if (buf.len() as u32) < len {
|
||||
self.need_min = len as u32;
|
||||
break;
|
||||
} else {
|
||||
let mut discard = false;
|
||||
let _ttl = sl.read_i64::<BE>().unwrap();
|
||||
let ts = sl.read_i64::<BE>().unwrap() as u64;
|
||||
let pulse = sl.read_i64::<BE>().unwrap() as u64;
|
||||
if ts == self.last_ts {
|
||||
self.repeated_ts_count += 1;
|
||||
if self.repeated_ts_count < 20 {
|
||||
let msg = format!(
|
||||
"EventChunker repeated event ts ix {} ts {}.{} last_ts {}.{} config {:?} path {:?}",
|
||||
self.repeated_ts_count,
|
||||
ts / SEC,
|
||||
ts % SEC,
|
||||
self.last_ts / SEC,
|
||||
self.last_ts % SEC,
|
||||
self.fetch_info.shape(),
|
||||
self.dbg_path
|
||||
);
|
||||
warn!("{}", msg);
|
||||
}
|
||||
}
|
||||
if ts < self.last_ts {
|
||||
discard = true;
|
||||
self.unordered_count += 1;
|
||||
if self.unordered_count < 20 {
|
||||
let msg = format!(
|
||||
"EventChunker unordered event ix {} ts {}.{} last_ts {}.{} config {:?} path {:?}",
|
||||
self.unordered_count,
|
||||
ts / SEC,
|
||||
ts % SEC,
|
||||
self.last_ts / SEC,
|
||||
self.last_ts % SEC,
|
||||
self.fetch_info.shape(),
|
||||
self.dbg_path
|
||||
);
|
||||
warn!("{}", msg);
|
||||
}
|
||||
}
|
||||
self.last_ts = ts;
|
||||
if ts >= self.range.end {
|
||||
discard = true;
|
||||
self.seen_after_range_count += 1;
|
||||
if !self.expand || self.seen_after_range_count >= 2 {
|
||||
self.seen_beyond_range = true;
|
||||
self.data_emit_complete = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ts < self.range.beg {
|
||||
discard = true;
|
||||
self.seen_before_range_count += 1;
|
||||
if self.seen_before_range_count < 20 {
|
||||
let msg = format!(
|
||||
"seen before range: {} event ts {}.{} range beg {}.{} range end {}.{} pulse {} config {:?} path {:?}",
|
||||
self.seen_before_range_count,
|
||||
ts / SEC,
|
||||
ts % SEC,
|
||||
self.range.beg / SEC,
|
||||
self.range.beg % SEC,
|
||||
self.range.end / SEC,
|
||||
self.range.end % SEC,
|
||||
pulse,
|
||||
self.fetch_info.shape(),
|
||||
self.dbg_path
|
||||
);
|
||||
warn!("{}", msg);
|
||||
}
|
||||
if self.seen_before_range_count > 100 {
|
||||
let msg = format!(
|
||||
"too many seen before range: {} event ts {}.{} range beg {}.{} range end {}.{} pulse {} config {:?} path {:?}",
|
||||
self.seen_before_range_count,
|
||||
ts / SEC,
|
||||
ts % SEC,
|
||||
self.range.beg / SEC,
|
||||
self.range.beg % SEC,
|
||||
self.range.end / SEC,
|
||||
self.range.end % SEC,
|
||||
pulse,
|
||||
self.fetch_info.shape(),
|
||||
self.dbg_path
|
||||
);
|
||||
error!("{}", msg);
|
||||
return Err(DataParseError::TooManyBeforeRange);
|
||||
}
|
||||
}
|
||||
let _ioc_ts = sl.read_i64::<BE>().unwrap();
|
||||
let status = sl.read_i8().unwrap();
|
||||
let severity = sl.read_i8().unwrap();
|
||||
let optional = sl.read_i32::<BE>().unwrap();
|
||||
if status != 0 {
|
||||
// return Err(DataParseError::UnexpectedStatus);
|
||||
// TODO count
|
||||
}
|
||||
if severity != 0 {
|
||||
// return Err(DataParseError::TooManyBeforeRange);
|
||||
// TODO count
|
||||
}
|
||||
if optional != -1 {
|
||||
return Err(DataParseError::EventWithOptional);
|
||||
}
|
||||
let type_flags = sl.read_u8().unwrap();
|
||||
let type_index = sl.read_u8().unwrap();
|
||||
if type_index > 13 {
|
||||
return Err(DataParseError::BadTypeIndex);
|
||||
}
|
||||
let scalar_type =
|
||||
ScalarType::from_dtype_index(type_index).map_err(|_| DataParseError::BadTypeIndex)?;
|
||||
let is_compressed = type_flags & COMPRESSION != 0;
|
||||
let is_array = type_flags & ARRAY != 0;
|
||||
let is_big_endian = type_flags & BIG_ENDIAN != 0;
|
||||
let is_shaped = type_flags & SHAPE != 0;
|
||||
if let Shape::Wave(_) = self.fetch_info.shape() {
|
||||
if !is_array {
|
||||
return Err(DataParseError::WaveShapeWithoutEventArray);
|
||||
}
|
||||
}
|
||||
let compression_method = if is_compressed { sl.read_u8().unwrap() } else { 0 };
|
||||
let shape_dim = if is_shaped { sl.read_u8().unwrap() } else { 0 };
|
||||
let mut shape_lens = [0, 0, 0, 0];
|
||||
for i1 in 0..shape_dim {
|
||||
shape_lens[i1 as usize] = sl.read_u32::<BE>().unwrap();
|
||||
}
|
||||
let shape_this = {
|
||||
if is_shaped {
|
||||
if shape_dim == 1 {
|
||||
Shape::Wave(shape_lens[0])
|
||||
} else if shape_dim == 2 {
|
||||
Shape::Image(shape_lens[0], shape_lens[1])
|
||||
} else if shape_dim == 0 {
|
||||
discard = true;
|
||||
// return Err(DataParseError::ShapedWithoutDims);
|
||||
Shape::Scalar
|
||||
} else {
|
||||
discard = true;
|
||||
// return Err(DataParseError::TooManyDims);
|
||||
Shape::Scalar
|
||||
}
|
||||
} else {
|
||||
Shape::Scalar
|
||||
}
|
||||
};
|
||||
let comp_this = if is_compressed {
|
||||
if compression_method == 0 {
|
||||
Some(CompressionMethod::BitshuffleLZ4)
|
||||
} else {
|
||||
return Err(DataParseError::UnknownCompression);
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let p1 = sl.position();
|
||||
let n1 = p1 - p0;
|
||||
let n2 = len as u64 - n1 - 4;
|
||||
let databuf = buf[p1 as usize..(p1 as usize + n2 as usize)].as_ref();
|
||||
if false && is_compressed {
|
||||
//debug!("event ts {} is_compressed {}", ts, is_compressed);
|
||||
let value_bytes = sl.read_u64::<BE>().unwrap();
|
||||
let block_size = sl.read_u32::<BE>().unwrap();
|
||||
//debug!("event len {} ts {} is_compressed {} shape_dim {} len-dim-0 {} value_bytes {} block_size {}", len, ts, is_compressed, shape_dim, shape_lens[0], value_bytes, block_size);
|
||||
match self.fetch_info.shape() {
|
||||
Shape::Scalar => {
|
||||
assert!(value_bytes < 1024 * 1);
|
||||
}
|
||||
Shape::Wave(_) => {
|
||||
assert!(value_bytes < 1024 * 64);
|
||||
}
|
||||
Shape::Image(_, _) => {
|
||||
assert!(value_bytes < 1024 * 1024 * 20);
|
||||
}
|
||||
}
|
||||
if block_size > 1024 * 32 {
|
||||
return Err(DataParseError::BadCompresionBlockSize);
|
||||
}
|
||||
let type_size = scalar_type.bytes() as u32;
|
||||
let _ele_count = value_bytes / type_size as u64;
|
||||
let _ele_size = type_size;
|
||||
}
|
||||
if discard {
|
||||
self.discard_count += 1;
|
||||
} else {
|
||||
ret.add_event(
|
||||
ts,
|
||||
pulse,
|
||||
Some(databuf.to_vec()),
|
||||
None,
|
||||
scalar_type,
|
||||
is_big_endian,
|
||||
shape_this,
|
||||
comp_this,
|
||||
);
|
||||
}
|
||||
buf.advance(len as usize);
|
||||
parsed_bytes += len as u64;
|
||||
self.need_min = 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(ParseResult {
|
||||
events: ret,
|
||||
parsed_bytes,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for EventChunker {
|
||||
type Item = Result<StreamItem<RangeCompletableItem<EventFull>>, Error>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
'outer: loop {
|
||||
break if self.completed {
|
||||
panic!("EventChunker poll_next on completed");
|
||||
} else if self.errored {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else if self.parsed_bytes >= self.stats_conf.disk_stats_every.bytes() as u64 {
|
||||
let item = EventDataReadStats {
|
||||
parsed_bytes: self.parsed_bytes,
|
||||
};
|
||||
self.parsed_bytes = 0;
|
||||
let ret = StreamItem::Stats(StatsItem::EventDataReadStats(item));
|
||||
Ready(Some(Ok(ret)))
|
||||
} else if self.sent_beyond_range {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else if self.final_stats_sent {
|
||||
self.sent_beyond_range = true;
|
||||
trace!("sent_beyond_range");
|
||||
if self.seen_beyond_range {
|
||||
trace!("sent_beyond_range RangeComplete");
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
|
||||
} else {
|
||||
trace!("sent_beyond_range non-complete");
|
||||
continue 'outer;
|
||||
}
|
||||
} else if self.data_emit_complete {
|
||||
let item = EventDataReadStats {
|
||||
parsed_bytes: self.parsed_bytes,
|
||||
};
|
||||
self.parsed_bytes = 0;
|
||||
let ret = StreamItem::Stats(StatsItem::EventDataReadStats(item));
|
||||
self.final_stats_sent = true;
|
||||
Ready(Some(Ok(ret)))
|
||||
} else {
|
||||
match self.inp.poll_next_unpin(cx) {
|
||||
Ready(Some(Ok(mut fcr))) => {
|
||||
if false {
|
||||
// TODO collect for stats:
|
||||
info!(
|
||||
"file read bytes {} ms {}",
|
||||
fcr.buf().len(),
|
||||
fcr.duration().as_millis()
|
||||
);
|
||||
}
|
||||
let r = self.parse_buf(fcr.buf_mut());
|
||||
match r {
|
||||
Ok(res) => {
|
||||
self.parsed_bytes += res.parsed_bytes;
|
||||
if fcr.buf().len() > 0 {
|
||||
// TODO gather stats about this:
|
||||
self.inp.put_back(fcr);
|
||||
}
|
||||
if self.need_min > self.need_min_max {
|
||||
let msg = format!(
|
||||
"spurious EventChunker asks for need_min {} max {}",
|
||||
self.need_min, self.need_min_max
|
||||
);
|
||||
self.errored = true;
|
||||
return Ready(Some(Err(Error::with_msg(msg))));
|
||||
}
|
||||
let x = self.need_min;
|
||||
self.inp.set_need_min(x);
|
||||
if false {
|
||||
info!(
|
||||
"EventChunker emits {} events tss {:?}",
|
||||
res.events.len(),
|
||||
res.events.tss
|
||||
);
|
||||
};
|
||||
self.item_len_emit_histo.ingest(res.events.len() as u32);
|
||||
let ret = StreamItem::DataItem(RangeCompletableItem::Data(res.events));
|
||||
Ready(Some(Ok(ret)))
|
||||
}
|
||||
Err(e) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e.into())))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ready(Some(Err(e))) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Ready(None) => {
|
||||
self.data_emit_complete = true;
|
||||
continue 'outer;
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
//use err::Error;
|
||||
//use netpod::timeunits::*;
|
||||
//use netpod::{ByteSize, Nanos};
|
||||
|
||||
//const TEST_BACKEND: &str = "testbackend-00";
|
||||
|
||||
/*
|
||||
#[test]
|
||||
fn read_expanded_for_range(range: netpod::NanoRange, nodeix: usize) -> Result<(usize, usize), Error> {
|
||||
let chn = netpod::Channel {
|
||||
backend: TEST_BACKEND.into(),
|
||||
name: "scalar-i32-be".into(),
|
||||
};
|
||||
// TODO read config from disk.
|
||||
let channel_config = ChannelConfig {
|
||||
channel: chn,
|
||||
keyspace: 2,
|
||||
time_bin_size: Nanos { ns: DAY },
|
||||
scalar_type: netpod::ScalarType::I32,
|
||||
byte_order: netpod::ByteOrder::big_endian(),
|
||||
shape: netpod::Shape::Scalar,
|
||||
array: false,
|
||||
compression: false,
|
||||
};
|
||||
let cluster = taskrun::test_cluster();
|
||||
let node = cluster.nodes[nodeix].clone();
|
||||
let buffer_size = 512;
|
||||
let event_chunker_conf = EventChunkerConf {
|
||||
disk_stats_every: ByteSize::kb(1024),
|
||||
};
|
||||
}
|
||||
*/
|
||||
}
|
||||
1
crates/disk/src/frame.rs
Normal file
1
crates/disk/src/frame.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod makeframe;
|
||||
1
crates/disk/src/frame/makeframe.rs
Normal file
1
crates/disk/src/frame/makeframe.rs
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
536
crates/disk/src/gen.rs
Normal file
536
crates/disk/src/gen.rs
Normal file
@@ -0,0 +1,536 @@
|
||||
use crate::ChannelConfigExt;
|
||||
use crate::SfDbChConf;
|
||||
use bitshuffle::bitshuffle_compress;
|
||||
use bytes::BufMut;
|
||||
use bytes::BytesMut;
|
||||
use err::Error;
|
||||
use netpod::log::*;
|
||||
use netpod::timeunits::*;
|
||||
use netpod::ByteOrder;
|
||||
use netpod::DtNano;
|
||||
use netpod::GenVar;
|
||||
use netpod::Node;
|
||||
use netpod::ScalarType;
|
||||
use netpod::SfDatabuffer;
|
||||
use netpod::SfDbChannel;
|
||||
use netpod::Shape;
|
||||
use netpod::TsNano;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use tokio::fs::File;
|
||||
use tokio::fs::OpenOptions;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
const BACKEND: &str = "testbackend-00";
|
||||
|
||||
pub async fn gen_test_data() -> Result<(), Error> {
|
||||
let backend = String::from(BACKEND);
|
||||
let homedir = std::env::var("HOME").unwrap();
|
||||
let data_base_path = PathBuf::from(homedir).join("daqbuffer-testdata").join("databuffer");
|
||||
let ksprefix = String::from("ks");
|
||||
let mut ensemble = Ensemble {
|
||||
nodes: Vec::new(),
|
||||
channels: Vec::new(),
|
||||
};
|
||||
{
|
||||
let chn = ChannelGenProps {
|
||||
config: SfDbChConf {
|
||||
channel: SfDbChannel::from_name(&backend, "scalar-i32-be"),
|
||||
keyspace: 2,
|
||||
time_bin_size: DtNano::from_ns(DAY),
|
||||
scalar_type: ScalarType::I32,
|
||||
byte_order: ByteOrder::Big,
|
||||
shape: Shape::Scalar,
|
||||
array: false,
|
||||
compression: false,
|
||||
},
|
||||
gen_var: netpod::GenVar::Default,
|
||||
time_spacing: MS * 500,
|
||||
};
|
||||
ensemble.channels.push(chn);
|
||||
let chn = ChannelGenProps {
|
||||
config: SfDbChConf {
|
||||
channel: SfDbChannel::from_name(&backend, "wave-f64-be-n21"),
|
||||
keyspace: 3,
|
||||
time_bin_size: DtNano::from_ns(DAY),
|
||||
array: true,
|
||||
scalar_type: ScalarType::F64,
|
||||
shape: Shape::Wave(21),
|
||||
byte_order: ByteOrder::Big,
|
||||
compression: true,
|
||||
},
|
||||
gen_var: netpod::GenVar::Default,
|
||||
time_spacing: MS * 4000,
|
||||
};
|
||||
ensemble.channels.push(chn);
|
||||
let chn = ChannelGenProps {
|
||||
config: SfDbChConf {
|
||||
channel: SfDbChannel::from_name(&backend, "wave-u16-le-n77"),
|
||||
keyspace: 3,
|
||||
time_bin_size: DtNano::from_ns(DAY),
|
||||
scalar_type: ScalarType::U16,
|
||||
byte_order: ByteOrder::Little,
|
||||
shape: Shape::Wave(77),
|
||||
array: true,
|
||||
compression: true,
|
||||
},
|
||||
gen_var: netpod::GenVar::Default,
|
||||
time_spacing: MS * 500,
|
||||
};
|
||||
ensemble.channels.push(chn);
|
||||
let chn = ChannelGenProps {
|
||||
config: SfDbChConf {
|
||||
channel: SfDbChannel::from_name(&backend, "tw-scalar-i32-be"),
|
||||
keyspace: 2,
|
||||
time_bin_size: DtNano::from_ns(DAY),
|
||||
scalar_type: ScalarType::I32,
|
||||
byte_order: ByteOrder::Little,
|
||||
shape: Shape::Scalar,
|
||||
array: false,
|
||||
compression: false,
|
||||
},
|
||||
gen_var: netpod::GenVar::TimeWeight,
|
||||
time_spacing: MS * 500,
|
||||
};
|
||||
ensemble.channels.push(chn);
|
||||
let chn = ChannelGenProps {
|
||||
config: SfDbChConf {
|
||||
channel: SfDbChannel::from_name(&backend, "const-regular-scalar-i32-be"),
|
||||
keyspace: 2,
|
||||
time_bin_size: DtNano::from_ns(DAY),
|
||||
scalar_type: ScalarType::I32,
|
||||
byte_order: ByteOrder::Little,
|
||||
shape: Shape::Scalar,
|
||||
array: false,
|
||||
compression: false,
|
||||
},
|
||||
gen_var: netpod::GenVar::ConstRegular,
|
||||
time_spacing: MS * 500,
|
||||
};
|
||||
ensemble.channels.push(chn);
|
||||
}
|
||||
for i1 in 0..3 {
|
||||
let node = Node {
|
||||
host: "localhost".into(),
|
||||
listen: "0.0.0.0".into(),
|
||||
port: 7780 + i1 as u16,
|
||||
port_raw: 7780 + i1 as u16 + 100,
|
||||
cache_base_path: data_base_path.join(format!("node{:02}", i1)),
|
||||
sf_databuffer: Some(SfDatabuffer {
|
||||
data_base_path: data_base_path.join(format!("node{:02}", i1)),
|
||||
ksprefix: ksprefix.clone(),
|
||||
splits: None,
|
||||
}),
|
||||
archiver_appliance: None,
|
||||
channel_archiver: None,
|
||||
prometheus_api_bind: None,
|
||||
};
|
||||
ensemble.nodes.push(node);
|
||||
}
|
||||
for (split, node) in ensemble.nodes.iter().enumerate() {
|
||||
gen_node(split as u32, node, &ensemble).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct Ensemble {
|
||||
nodes: Vec<Node>,
|
||||
channels: Vec<ChannelGenProps>,
|
||||
}
|
||||
|
||||
pub struct ChannelGenProps {
|
||||
config: SfDbChConf,
|
||||
time_spacing: u64,
|
||||
gen_var: GenVar,
|
||||
}
|
||||
|
||||
async fn gen_node(split: u32, node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
|
||||
for chn in &ensemble.channels {
|
||||
gen_channel(chn, split, node, ensemble).await?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn gen_channel(chn: &ChannelGenProps, split: u32, node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
|
||||
let sfc = node.sf_databuffer.as_ref().unwrap();
|
||||
let config_path = sfc.data_base_path.join("config").join(chn.config.channel.name());
|
||||
let channel_path = sfc
|
||||
.data_base_path
|
||||
.join(format!("{}_{}", sfc.ksprefix, chn.config.keyspace))
|
||||
.join("byTime")
|
||||
.join(chn.config.channel.name());
|
||||
tokio::fs::create_dir_all(&channel_path).await?;
|
||||
gen_config(&config_path, &chn.config, node, ensemble)
|
||||
.await
|
||||
.map_err(|k| Error::with_msg(format!("can not generate config {:?}", k)))?;
|
||||
let mut evix = 0;
|
||||
let mut ts = TsNano(0);
|
||||
let mut pulse = 0;
|
||||
while ts.ns() < DAY * 3 {
|
||||
let res = gen_timebin(
|
||||
evix,
|
||||
ts,
|
||||
pulse,
|
||||
chn.time_spacing,
|
||||
&channel_path,
|
||||
&chn.config,
|
||||
split,
|
||||
node,
|
||||
ensemble,
|
||||
&chn.gen_var,
|
||||
)
|
||||
.await?;
|
||||
evix = res.evix;
|
||||
ts = res.ts;
|
||||
pulse = res.pulse;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn gen_config(config_path: &Path, config: &SfDbChConf, _node: &Node, _ensemble: &Ensemble) -> Result<(), Error> {
|
||||
let path = config_path.join("latest");
|
||||
tokio::fs::create_dir_all(&path).await?;
|
||||
let path = path.join("00000_Config");
|
||||
info!("try to open {:?}", path);
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(path)
|
||||
.await?;
|
||||
let mut buf = BytesMut::with_capacity(1024 * 1);
|
||||
let ver = 0;
|
||||
buf.put_i16(ver);
|
||||
let cnenc = config.channel.name().as_bytes();
|
||||
let len1 = cnenc.len() + 8;
|
||||
buf.put_i32(len1 as i32);
|
||||
buf.put(cnenc);
|
||||
buf.put_i32(len1 as i32);
|
||||
|
||||
let ts = 0;
|
||||
let pulse = 0;
|
||||
let sc = 0;
|
||||
let status = 0;
|
||||
let bb = 0;
|
||||
let modulo = 0;
|
||||
let offset = 0;
|
||||
let precision = 0;
|
||||
let p1 = buf.len();
|
||||
buf.put_i32(0x20202020);
|
||||
buf.put_i64(ts);
|
||||
buf.put_i64(pulse);
|
||||
buf.put_u32(config.keyspace as u32);
|
||||
buf.put_u64(config.time_bin_size.ns() / MS);
|
||||
buf.put_i32(sc);
|
||||
buf.put_i32(status);
|
||||
buf.put_i8(bb);
|
||||
buf.put_i32(modulo);
|
||||
buf.put_i32(offset);
|
||||
buf.put_i16(precision);
|
||||
|
||||
{
|
||||
// this len does not include itself and there seems to be no copy of it afterwards.
|
||||
let p3 = buf.len();
|
||||
buf.put_i32(404040);
|
||||
buf.put_u8(config.dtflags());
|
||||
buf.put_u8(config.scalar_type.index());
|
||||
if config.compression {
|
||||
let method = 0;
|
||||
buf.put_i8(method);
|
||||
}
|
||||
match config.shape {
|
||||
Shape::Scalar => {}
|
||||
Shape::Wave(k) => {
|
||||
buf.put_i8(1);
|
||||
buf.put_i32(k as i32);
|
||||
}
|
||||
Shape::Image(_, _) => {
|
||||
// TODO test data
|
||||
err::todoval()
|
||||
}
|
||||
}
|
||||
let len = buf.len() - p3 - 4;
|
||||
buf.as_mut()[p3..].as_mut().put_i32(len as i32);
|
||||
}
|
||||
|
||||
// source name
|
||||
buf.put_i32(-1);
|
||||
// unit
|
||||
buf.put_i32(-1);
|
||||
// description
|
||||
buf.put_i32(-1);
|
||||
// optional fields
|
||||
buf.put_i32(-1);
|
||||
// value converter
|
||||
buf.put_i32(-1);
|
||||
|
||||
let p2 = buf.len();
|
||||
let len = p2 - p1 + 4;
|
||||
buf.put_i32(len as i32);
|
||||
buf.as_mut()[p1..].as_mut().put_i32(len as i32);
|
||||
file.write_all(&buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct CountedFile {
|
||||
file: File,
|
||||
bytes: u64,
|
||||
}
|
||||
|
||||
impl CountedFile {
|
||||
pub fn new(file: File) -> Self {
|
||||
Self { file, bytes: 0 }
|
||||
}
|
||||
pub async fn write_all(&mut self, buf: &[u8]) -> Result<u64, Error> {
|
||||
let l = buf.len();
|
||||
let mut i = 0;
|
||||
loop {
|
||||
match self.file.write(&buf[i..]).await {
|
||||
Ok(n) => {
|
||||
i += n;
|
||||
self.bytes += n as u64;
|
||||
if i >= l {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => Err(e)?,
|
||||
}
|
||||
}
|
||||
Ok(i as u64)
|
||||
}
|
||||
pub fn written_len(&self) -> u64 {
|
||||
self.bytes
|
||||
}
|
||||
}
|
||||
|
||||
struct GenTimebinRes {
|
||||
evix: u64,
|
||||
ts: TsNano,
|
||||
pulse: u64,
|
||||
}
|
||||
|
||||
async fn gen_timebin(
|
||||
evix: u64,
|
||||
ts: TsNano,
|
||||
pulse: u64,
|
||||
ts_spacing: u64,
|
||||
channel_path: &Path,
|
||||
config: &SfDbChConf,
|
||||
split: u32,
|
||||
_node: &Node,
|
||||
ensemble: &Ensemble,
|
||||
gen_var: &GenVar,
|
||||
) -> Result<GenTimebinRes, Error> {
|
||||
let tb = ts.ns() / config.time_bin_size.ns();
|
||||
let path = channel_path.join(format!("{:019}", tb)).join(format!("{:010}", split));
|
||||
tokio::fs::create_dir_all(&path).await?;
|
||||
let data_path = path.join(format!("{:019}_{:05}_Data", config.time_bin_size.ns() / MS, 0));
|
||||
let index_path = path.join(format!("{:019}_{:05}_Data_Index", config.time_bin_size.ns() / MS, 0));
|
||||
info!("open data file {:?}", data_path);
|
||||
let file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(data_path)
|
||||
.await?;
|
||||
let mut file = CountedFile::new(file);
|
||||
let mut index_file = if let Shape::Wave(_) = config.shape {
|
||||
info!("open index file {:?}", index_path);
|
||||
let f = OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
.open(index_path)
|
||||
.await?;
|
||||
let mut f = CountedFile::new(f);
|
||||
f.write_all(b"\x00\x00").await?;
|
||||
Some(f)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
gen_datafile_header(&mut file, config).await?;
|
||||
let mut evix = evix;
|
||||
let mut ts = ts;
|
||||
let mut pulse = pulse;
|
||||
let tsmax = TsNano((tb + 1) * config.time_bin_size.ns());
|
||||
while ts.ns() < tsmax.ns() {
|
||||
match gen_var {
|
||||
// TODO
|
||||
// Splits and nodes are not in 1-to-1 correspondence.
|
||||
GenVar::Default => {
|
||||
if evix % ensemble.nodes.len() as u64 == split as u64 {
|
||||
gen_event(&mut file, index_file.as_mut(), evix, ts.clone(), pulse, config, gen_var).await?;
|
||||
}
|
||||
}
|
||||
GenVar::ConstRegular => {
|
||||
if evix % ensemble.nodes.len() as u64 == split as u64 {
|
||||
gen_event(&mut file, index_file.as_mut(), evix, ts.clone(), pulse, config, gen_var).await?;
|
||||
}
|
||||
}
|
||||
GenVar::TimeWeight => {
|
||||
let m = evix % 20;
|
||||
if m == 0 || m == 1 {
|
||||
if evix % ensemble.nodes.len() as u64 == split as u64 {
|
||||
gen_event(&mut file, index_file.as_mut(), evix, ts.clone(), pulse, config, gen_var).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
evix += 1;
|
||||
ts.0 += ts_spacing;
|
||||
pulse += 1;
|
||||
}
|
||||
let ret = GenTimebinRes { evix, ts, pulse };
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
async fn gen_datafile_header(file: &mut CountedFile, config: &SfDbChConf) -> Result<(), Error> {
|
||||
let mut buf = BytesMut::with_capacity(1024);
|
||||
let cnenc = config.channel.name().as_bytes();
|
||||
let len1 = cnenc.len() + 8;
|
||||
buf.put_i16(0);
|
||||
buf.put_i32(len1 as i32);
|
||||
buf.put(cnenc);
|
||||
buf.put_i32(len1 as i32);
|
||||
file.write_all(&buf).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn gen_event(
|
||||
file: &mut CountedFile,
|
||||
index_file: Option<&mut CountedFile>,
|
||||
evix: u64,
|
||||
ts: TsNano,
|
||||
pulse: u64,
|
||||
config: &SfDbChConf,
|
||||
gen_var: &GenVar,
|
||||
) -> Result<(), Error> {
|
||||
let ttl = 0xcafecafe;
|
||||
let ioc_ts = 0xcafecafe;
|
||||
let mut buf = BytesMut::with_capacity(1024 * 16);
|
||||
buf.put_i32(0xcafecafe as u32 as i32);
|
||||
buf.put_u64(ttl);
|
||||
buf.put_u64(ts.ns());
|
||||
buf.put_u64(pulse);
|
||||
buf.put_u64(ioc_ts);
|
||||
buf.put_u8(0);
|
||||
buf.put_u8(0);
|
||||
buf.put_i32(-1);
|
||||
use streams::dtflags::*;
|
||||
if config.compression {
|
||||
match config.shape {
|
||||
Shape::Wave(ele_count) => {
|
||||
let mut flags = COMPRESSION | ARRAY | SHAPE;
|
||||
if config.byte_order.is_be() {
|
||||
flags |= BIG_ENDIAN;
|
||||
}
|
||||
buf.put_u8(flags);
|
||||
buf.put_u8(config.scalar_type.index());
|
||||
let comp_method = 0 as u8;
|
||||
buf.put_u8(comp_method);
|
||||
buf.put_u8(1);
|
||||
buf.put_u32(ele_count as u32);
|
||||
match &config.scalar_type {
|
||||
ScalarType::F64 => {
|
||||
let ele_size = 8;
|
||||
let mut vals = vec![0; (ele_size * ele_count) as usize];
|
||||
for i1 in 0..ele_count {
|
||||
let v = (evix as f64) * 100.0 + i1 as f64;
|
||||
let a = if config.byte_order.is_be() {
|
||||
v.to_be_bytes()
|
||||
} else {
|
||||
v.to_le_bytes()
|
||||
};
|
||||
use std::io::{Cursor, Seek, SeekFrom, Write};
|
||||
let mut c1 = Cursor::new(&mut vals);
|
||||
c1.seek(SeekFrom::Start(i1 as u64 * ele_size as u64))?;
|
||||
Write::write_all(&mut c1, &a)?;
|
||||
}
|
||||
let mut comp = vec![0u8; (ele_size * ele_count + 64) as usize];
|
||||
let n1 =
|
||||
bitshuffle_compress(&vals, &mut comp, ele_count as usize, ele_size as usize, 0).unwrap();
|
||||
buf.put_u64(vals.len() as u64);
|
||||
let comp_block_size = 0;
|
||||
buf.put_u32(comp_block_size);
|
||||
buf.put(&comp[..n1]);
|
||||
}
|
||||
ScalarType::U16 => {
|
||||
let ele_size = 2;
|
||||
let mut vals = vec![0; (ele_size * ele_count) as usize];
|
||||
for i1 in 0..ele_count {
|
||||
let v = (evix as u16).wrapping_mul(100).wrapping_add(i1 as u16);
|
||||
let a = if config.byte_order.is_be() {
|
||||
v.to_be_bytes()
|
||||
} else {
|
||||
v.to_le_bytes()
|
||||
};
|
||||
use std::io::{Cursor, Seek, SeekFrom, Write};
|
||||
let mut c1 = Cursor::new(&mut vals);
|
||||
c1.seek(SeekFrom::Start(i1 as u64 * ele_size as u64))?;
|
||||
Write::write_all(&mut c1, &a)?;
|
||||
}
|
||||
let mut comp = vec![0u8; (ele_size * ele_count + 64) as usize];
|
||||
let n1 =
|
||||
bitshuffle_compress(&vals, &mut comp, ele_count as usize, ele_size as usize, 0).unwrap();
|
||||
buf.put_u64(vals.len() as u64);
|
||||
let comp_block_size = 0;
|
||||
buf.put_u32(comp_block_size);
|
||||
buf.put(&comp[..n1]);
|
||||
}
|
||||
_ => todo!("Datatype not yet supported: {:?}", config.scalar_type),
|
||||
}
|
||||
}
|
||||
_ => todo!("Shape not yet supported: {:?}", config.shape),
|
||||
}
|
||||
} else {
|
||||
match config.shape {
|
||||
Shape::Scalar => {
|
||||
let mut flags = 0;
|
||||
if config.byte_order.is_be() {
|
||||
flags |= BIG_ENDIAN;
|
||||
}
|
||||
buf.put_u8(flags);
|
||||
buf.put_u8(config.scalar_type.index());
|
||||
match &config.scalar_type {
|
||||
ScalarType::I32 => {
|
||||
let v = match gen_var {
|
||||
GenVar::Default => evix as i32,
|
||||
GenVar::ConstRegular => 42 as i32,
|
||||
GenVar::TimeWeight => {
|
||||
let m = evix % 20;
|
||||
if m == 0 {
|
||||
200
|
||||
} else if m == 1 {
|
||||
400
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
};
|
||||
if config.byte_order.is_be() {
|
||||
buf.put_i32(v);
|
||||
} else {
|
||||
buf.put_i32_le(v);
|
||||
};
|
||||
}
|
||||
_ => todo!("Datatype not yet supported: {:?}", config.scalar_type),
|
||||
}
|
||||
}
|
||||
_ => todo!("Shape not yet supported: {:?}", config.shape),
|
||||
}
|
||||
}
|
||||
{
|
||||
let len = buf.len() as u32 + 4;
|
||||
buf.put_u32(len);
|
||||
buf.as_mut().put_u32(len);
|
||||
}
|
||||
let z = file.written_len();
|
||||
file.write_all(buf.as_ref()).await?;
|
||||
if let Some(f) = index_file {
|
||||
let mut buf = BytesMut::with_capacity(16);
|
||||
buf.put_u64(ts.ns());
|
||||
buf.put_u64(z);
|
||||
f.write_all(&buf).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
345
crates/disk/src/index.rs
Normal file
345
crates/disk/src/index.rs
Normal file
@@ -0,0 +1,345 @@
|
||||
use arrayref::array_ref;
|
||||
use err::Error;
|
||||
use netpod::log::*;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::TsNano;
|
||||
use std::mem::size_of;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::AsyncSeekExt;
|
||||
use tokio::io::SeekFrom;
|
||||
|
||||
pub fn find_ge(range: NanoRange, expand_right: bool, buf: &[u8]) -> Result<Option<(u64, u64)>, Error> {
|
||||
type VT = u64;
|
||||
const NT: usize = size_of::<VT>();
|
||||
const N: usize = 2 * NT;
|
||||
let n1 = buf.len();
|
||||
if n1 % N != 0 {
|
||||
return Err(Error::with_msg(format!("find_ge bad len {}", n1)));
|
||||
}
|
||||
if n1 == 0 {
|
||||
warn!("Empty index data");
|
||||
return Ok(None);
|
||||
}
|
||||
let n1 = n1 / N;
|
||||
let a = unsafe {
|
||||
let ptr = &buf[0] as *const u8 as *const ([u8; NT], [u8; NT]);
|
||||
std::slice::from_raw_parts(ptr, n1)
|
||||
};
|
||||
let mut j = 0;
|
||||
let mut k = n1 - 1;
|
||||
let x = VT::from_be_bytes(a[j].0);
|
||||
let y = VT::from_be_bytes(a[k].0);
|
||||
if y < range.beg {
|
||||
return Ok(None);
|
||||
}
|
||||
if x >= range.beg {
|
||||
if x < range.end || expand_right {
|
||||
return Ok(Some((x, VT::from_be_bytes(a[j].1))));
|
||||
} else {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
if x >= y {
|
||||
return Err(Error::with_public_msg(format!(
|
||||
"search in unordered data ts1 {x} ts2 {y}"
|
||||
)));
|
||||
}
|
||||
let mut x = x;
|
||||
let mut y = y;
|
||||
loop {
|
||||
if x >= y {
|
||||
return Err(Error::with_public_msg(format!(
|
||||
"search (loop) in unordered data ts1 {x} ts2 {y}"
|
||||
)));
|
||||
}
|
||||
if k - j < 2 {
|
||||
if y < range.end || expand_right {
|
||||
let ret = (y, VT::from_be_bytes(a[k].1));
|
||||
return Ok(Some(ret));
|
||||
} else {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
let m = (k + j) / 2;
|
||||
let e = VT::from_be_bytes(a[m].0);
|
||||
if e < range.beg {
|
||||
j = m;
|
||||
x = e;
|
||||
} else {
|
||||
k = m;
|
||||
y = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_largest_smaller_than(
|
||||
range: NanoRange,
|
||||
_expand_right: bool,
|
||||
buf: &[u8],
|
||||
) -> Result<Option<(u64, u64)>, Error> {
|
||||
type NUM = u64;
|
||||
const ELESIZE: usize = size_of::<NUM>();
|
||||
const N: usize = 2 * ELESIZE;
|
||||
let n1 = buf.len();
|
||||
if n1 % N != 0 {
|
||||
return Err(Error::with_msg(format!("find_ge bad len {}", n1)));
|
||||
}
|
||||
if n1 == 0 {
|
||||
warn!("Empty index data");
|
||||
return Ok(None);
|
||||
}
|
||||
let n1 = n1 / N;
|
||||
let a = unsafe {
|
||||
let ptr = &buf[0] as *const u8 as *const ([u8; ELESIZE], [u8; ELESIZE]);
|
||||
std::slice::from_raw_parts(ptr, n1)
|
||||
};
|
||||
let mut j = 0;
|
||||
let mut k = n1 - 1;
|
||||
let x = NUM::from_be_bytes(a[j].0);
|
||||
let y = NUM::from_be_bytes(a[k].0);
|
||||
if x >= range.beg {
|
||||
return Ok(None);
|
||||
}
|
||||
if y < range.beg {
|
||||
let ret = (y, NUM::from_be_bytes(a[k].1));
|
||||
return Ok(Some(ret));
|
||||
}
|
||||
if x >= y {
|
||||
return Err(Error::with_public_msg(format!(
|
||||
"search in unordered data ts1 {x} ts2 {y}"
|
||||
)));
|
||||
}
|
||||
let mut x = x;
|
||||
let mut y = y;
|
||||
loop {
|
||||
if x >= y {
|
||||
return Err(Error::with_public_msg(format!(
|
||||
"search (loop) in unordered data ts1 {x} ts2 {y}"
|
||||
)));
|
||||
}
|
||||
if k - j < 2 {
|
||||
let ret = (x, NUM::from_be_bytes(a[j].1));
|
||||
return Ok(Some(ret));
|
||||
}
|
||||
let m = (k + j) / 2;
|
||||
let e = NUM::from_be_bytes(a[m].0);
|
||||
if e < range.beg {
|
||||
j = m;
|
||||
x = e;
|
||||
} else {
|
||||
k = m;
|
||||
y = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn read(buf: &mut [u8], file: &mut File) -> Result<usize, Error> {
|
||||
let mut wp = 0;
|
||||
loop {
|
||||
let n1 = file.read(&mut buf[wp..]).await?;
|
||||
if n1 == 0 {
|
||||
break;
|
||||
} else {
|
||||
wp += n1;
|
||||
}
|
||||
if wp >= buf.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(wp)
|
||||
}
|
||||
|
||||
pub fn parse_channel_header(buf: &[u8]) -> Result<(u32,), Error> {
|
||||
if buf.len() < 6 {
|
||||
return Err(Error::with_msg(format!("parse_channel_header buf len: {}", buf.len())));
|
||||
}
|
||||
let ver = i16::from_be_bytes(*array_ref![buf, 0, 2]);
|
||||
if ver != 0 {
|
||||
return Err(Error::with_msg(format!("unknown file version: {}", ver)));
|
||||
}
|
||||
let len1 = u32::from_be_bytes(*array_ref![buf, 2, 4]);
|
||||
if len1 < 9 || len1 > 256 {
|
||||
return Err(Error::with_msg(format!("unexpected data file header len1: {}", len1)));
|
||||
}
|
||||
if buf.len() < 2 + len1 as usize {
|
||||
return Err(Error::with_msg(format!(
|
||||
"data file header not contained in buffer len1: {} vs {}",
|
||||
len1,
|
||||
buf.len()
|
||||
)));
|
||||
}
|
||||
let len2 = u32::from_be_bytes(*array_ref![buf, 2 + len1 as usize - 4, 4]);
|
||||
if len1 != len2 {
|
||||
return Err(Error::with_msg(format!("len mismatch len1: {} len2: {}", len1, len2)));
|
||||
}
|
||||
Ok((len1 as u32,))
|
||||
}
|
||||
|
||||
pub fn parse_event(buf: &[u8]) -> Result<(u32, TsNano), Error> {
|
||||
if buf.len() < 4 {
|
||||
return Err(Error::with_msg(format!("parse_event buf len: {}", buf.len())));
|
||||
}
|
||||
let len1 = u32::from_be_bytes(*array_ref![buf, 0, 4]);
|
||||
if len1 < 9 || len1 > 512 {
|
||||
return Err(Error::with_msg(format!("unexpected event len1: {}", len1)));
|
||||
}
|
||||
if buf.len() < len1 as usize {
|
||||
return Err(Error::with_msg(format!(
|
||||
"event not contained in buffer len1: {} vs {}",
|
||||
len1,
|
||||
buf.len()
|
||||
)));
|
||||
}
|
||||
let len2 = u32::from_be_bytes(*array_ref![buf, len1 as usize - 4, 4]);
|
||||
if len1 != len2 {
|
||||
return Err(Error::with_msg(format!("len mismatch len1: {} len2: {}", len1, len2)));
|
||||
}
|
||||
let ts = u64::from_be_bytes(*array_ref![buf, 12, 8]);
|
||||
Ok((len1 as u32, TsNano(ts)))
|
||||
}
|
||||
|
||||
pub async fn read_event_at(pos: u64, file: &mut File) -> Result<(u32, TsNano), Error> {
|
||||
file.seek(SeekFrom::Start(pos)).await?;
|
||||
let mut buf = vec![0; 1024];
|
||||
let _n1 = read(&mut buf, file).await?;
|
||||
let ev = parse_event(&buf)?;
|
||||
Ok(ev)
|
||||
}
|
||||
|
||||
pub async fn position_static_len_datafile(
|
||||
mut file: File,
|
||||
range: NanoRange,
|
||||
expand_right: bool,
|
||||
) -> Result<(File, bool, u32, u64), Error> {
|
||||
let flen = file.seek(SeekFrom::End(0)).await?;
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
let mut buf = vec![0; 1024];
|
||||
let _n1 = read(&mut buf, &mut file).await?;
|
||||
let hres = parse_channel_header(&buf)?;
|
||||
let headoff = 2 + hres.0 as u64;
|
||||
let ev = parse_event(&buf[headoff as usize..])?;
|
||||
let evlen = ev.0 as u64;
|
||||
let mut j = headoff;
|
||||
let mut k = ((flen - headoff) / evlen - 1) * evlen + headoff;
|
||||
let x = ev.1.ns();
|
||||
let t = read_event_at(k, &mut file).await?;
|
||||
if t.0 != evlen as u32 {
|
||||
Err(Error::with_msg(format!(
|
||||
"inconsistent event lengths: {} vs {}",
|
||||
t.0, evlen
|
||||
)))?;
|
||||
}
|
||||
let y = t.1.ns();
|
||||
let mut nreads = 2;
|
||||
if x >= range.end {
|
||||
if expand_right {
|
||||
file.seek(SeekFrom::Start(j)).await?;
|
||||
return Ok((file, true, nreads, j));
|
||||
} else {
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
return Ok((file, false, nreads, 0));
|
||||
}
|
||||
}
|
||||
if y < range.beg {
|
||||
file.seek(SeekFrom::Start(j)).await?;
|
||||
return Ok((file, false, nreads, j));
|
||||
}
|
||||
if x >= range.beg {
|
||||
if x < range.end || expand_right {
|
||||
file.seek(SeekFrom::Start(j)).await?;
|
||||
return Ok((file, true, nreads, j));
|
||||
} else {
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
return Ok((file, false, nreads, 0));
|
||||
}
|
||||
}
|
||||
let mut x = x;
|
||||
let mut y = y;
|
||||
loop {
|
||||
assert!(x < y);
|
||||
assert_eq!((k - j) % evlen, 0);
|
||||
if k - j < 2 * evlen {
|
||||
if y < range.end || expand_right {
|
||||
file.seek(SeekFrom::Start(k)).await?;
|
||||
return Ok((file, true, nreads, k));
|
||||
} else {
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
return Ok((file, false, nreads, 0));
|
||||
}
|
||||
}
|
||||
let m = j + (k - j) / 2 / evlen * evlen;
|
||||
let t = read_event_at(m, &mut file).await?;
|
||||
if t.0 != evlen as u32 {
|
||||
Err(Error::with_msg(format!(
|
||||
"inconsistent event lengths: {} vs {}",
|
||||
t.0, evlen
|
||||
)))?;
|
||||
}
|
||||
nreads += 1;
|
||||
let e = t.1.ns();
|
||||
if e < range.beg {
|
||||
x = e;
|
||||
j = m;
|
||||
} else {
|
||||
y = e;
|
||||
k = m;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn position_static_len_datafile_at_largest_smaller_than(
|
||||
mut file: File,
|
||||
range: NanoRange,
|
||||
_expand_right: bool,
|
||||
) -> Result<(File, bool, u32, u64), Error> {
|
||||
let flen = file.seek(SeekFrom::End(0)).await?;
|
||||
file.seek(SeekFrom::Start(0)).await?;
|
||||
let mut buf = vec![0; 1024];
|
||||
let _n1 = read(&mut buf, &mut file).await?;
|
||||
let hres = parse_channel_header(&buf)?;
|
||||
let headoff = 2 + hres.0 as u64;
|
||||
let ev = parse_event(&buf[headoff as usize..])?;
|
||||
let evlen = ev.0 as u64;
|
||||
let mut j = headoff;
|
||||
let mut k = ((flen - headoff) / evlen - 1) * evlen + headoff;
|
||||
let x = ev.1.ns();
|
||||
let t = read_event_at(k, &mut file).await?;
|
||||
if t.0 != evlen as u32 {
|
||||
Err(Error::with_msg(format!(
|
||||
"inconsistent event lengths: {} vs {}",
|
||||
t.0, evlen
|
||||
)))?;
|
||||
}
|
||||
let y = t.1.ns();
|
||||
let mut nreads = 2;
|
||||
if x >= range.beg {
|
||||
file.seek(SeekFrom::Start(j)).await?;
|
||||
return Ok((file, false, nreads, j));
|
||||
}
|
||||
if y < range.beg {
|
||||
file.seek(SeekFrom::Start(k)).await?;
|
||||
return Ok((file, true, nreads, k));
|
||||
}
|
||||
loop {
|
||||
if k - j < 2 * evlen {
|
||||
file.seek(SeekFrom::Start(j)).await?;
|
||||
return Ok((file, true, nreads, j));
|
||||
}
|
||||
let m = j + (k - j) / 2 / evlen * evlen;
|
||||
let t = read_event_at(m, &mut file).await?;
|
||||
if t.0 != evlen as u32 {
|
||||
Err(Error::with_msg(format!(
|
||||
"inconsistent event lengths: {} vs {}",
|
||||
t.0, evlen
|
||||
)))?;
|
||||
}
|
||||
nreads += 1;
|
||||
let x = t.1.ns();
|
||||
if x < range.beg {
|
||||
j = m;
|
||||
} else {
|
||||
k = m;
|
||||
}
|
||||
}
|
||||
}
|
||||
1
crates/disk/src/merge.rs
Normal file
1
crates/disk/src/merge.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod mergedblobsfromremotes;
|
||||
110
crates/disk/src/merge/mergedblobsfromremotes.rs
Normal file
110
crates/disk/src/merge/mergedblobsfromremotes.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
use err::Error;
|
||||
use futures_util::pin_mut;
|
||||
use futures_util::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use items_0::streamitem::Sitemty;
|
||||
use items_2::eventfull::EventFull;
|
||||
use items_2::merger::Merger;
|
||||
use netpod::log::*;
|
||||
use netpod::Cluster;
|
||||
use netpod::SfChFetchInfo;
|
||||
use query::api4::events::EventsSubQuery;
|
||||
use std::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
use streams::tcprawclient::x_processed_event_blobs_stream_from_node;
|
||||
|
||||
type T001<T> = Pin<Box<dyn Stream<Item = Sitemty<T>> + Send>>;
|
||||
type T002<T> = Pin<Box<dyn Future<Output = Result<T001<T>, Error>> + Send>>;
|
||||
|
||||
pub struct MergedBlobsFromRemotes {
|
||||
tcp_establish_futs: Vec<T002<EventFull>>,
|
||||
nodein: Vec<Option<T001<EventFull>>>,
|
||||
merged: Option<T001<EventFull>>,
|
||||
completed: bool,
|
||||
errored: bool,
|
||||
}
|
||||
|
||||
impl MergedBlobsFromRemotes {
|
||||
pub fn new(subq: EventsSubQuery, cluster: Cluster) -> Self {
|
||||
debug!("MergedBlobsFromRemotes subq {:?}", subq);
|
||||
let mut tcp_establish_futs = Vec::new();
|
||||
for node in &cluster.nodes {
|
||||
let f = x_processed_event_blobs_stream_from_node(subq.clone(), node.clone());
|
||||
let f: T002<EventFull> = Box::pin(f);
|
||||
tcp_establish_futs.push(f);
|
||||
}
|
||||
let n = tcp_establish_futs.len();
|
||||
Self {
|
||||
tcp_establish_futs,
|
||||
nodein: (0..n).into_iter().map(|_| None).collect(),
|
||||
merged: None,
|
||||
completed: false,
|
||||
errored: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for MergedBlobsFromRemotes {
|
||||
type Item = Sitemty<EventFull>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
'outer: loop {
|
||||
break if self.completed {
|
||||
panic!("poll_next on completed");
|
||||
} else if self.errored {
|
||||
self.completed = true;
|
||||
return Ready(None);
|
||||
} else if let Some(fut) = &mut self.merged {
|
||||
match fut.poll_next_unpin(cx) {
|
||||
Ready(Some(Ok(k))) => Ready(Some(Ok(k))),
|
||||
Ready(Some(Err(e))) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Ready(None) => {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
} else {
|
||||
let mut pend = false;
|
||||
let mut c1 = 0;
|
||||
for i1 in 0..self.tcp_establish_futs.len() {
|
||||
if self.nodein[i1].is_none() {
|
||||
let f = &mut self.tcp_establish_futs[i1];
|
||||
pin_mut!(f);
|
||||
match f.poll(cx) {
|
||||
Ready(Ok(k)) => {
|
||||
self.nodein[i1] = Some(k);
|
||||
}
|
||||
Ready(Err(e)) => {
|
||||
self.errored = true;
|
||||
return Ready(Some(Err(e)));
|
||||
}
|
||||
Pending => {
|
||||
pend = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
c1 += 1;
|
||||
}
|
||||
}
|
||||
if pend {
|
||||
Pending
|
||||
} else {
|
||||
if c1 == self.tcp_establish_futs.len() {
|
||||
let inps = self.nodein.iter_mut().map(|k| k.take().unwrap()).collect();
|
||||
// TODO set out_max_len dynamically
|
||||
let s1 = Merger::new(inps, 128);
|
||||
self.merged = Some(Box::pin(s1));
|
||||
}
|
||||
continue 'outer;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
133
crates/disk/src/paths.rs
Normal file
133
crates/disk/src/paths.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
use crate::SfDbChConf;
|
||||
use err::Error;
|
||||
use futures_util::StreamExt;
|
||||
use netpod::timeunits::MS;
|
||||
use netpod::Node;
|
||||
use netpod::SfChFetchInfo;
|
||||
use netpod::TsNano;
|
||||
use std::path::PathBuf;
|
||||
|
||||
// TODO remove/replace this
|
||||
pub fn datapath(timebin: u64, config: &SfDbChConf, split: u32, node: &Node) -> PathBuf {
|
||||
node.sf_databuffer
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.data_base_path
|
||||
.join(format!(
|
||||
"{}_{}",
|
||||
node.sf_databuffer.as_ref().unwrap().ksprefix,
|
||||
config.keyspace
|
||||
))
|
||||
.join("byTime")
|
||||
.join(config.channel.name())
|
||||
.join(format!("{:019}", timebin))
|
||||
.join(format!("{:010}", split))
|
||||
.join(format!("{:019}_00000_Data", config.time_bin_size.ns() / MS))
|
||||
}
|
||||
|
||||
/**
|
||||
Return potential datafile paths for the given timebin.
|
||||
|
||||
It says "potential datafile paths" because we don't open the file here yet and of course,
|
||||
files may vanish until then. Also, the timebin may actually not exist.
|
||||
*/
|
||||
pub async fn datapaths_for_timebin(
|
||||
timebin: u64,
|
||||
fetch_info: &SfChFetchInfo,
|
||||
node: &Node,
|
||||
) -> Result<Vec<PathBuf>, Error> {
|
||||
let sfc = node.sf_databuffer.as_ref().unwrap();
|
||||
let timebin_path = sfc
|
||||
.data_base_path
|
||||
.join(format!("{}_{}", sfc.ksprefix, fetch_info.ks()))
|
||||
.join("byTime")
|
||||
.join(fetch_info.name())
|
||||
.join(format!("{:019}", timebin));
|
||||
let rd = tokio::fs::read_dir(timebin_path).await?;
|
||||
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
|
||||
let mut splits = vec![];
|
||||
while let Some(e) = rd.next().await {
|
||||
let e = e?;
|
||||
let dn = e
|
||||
.file_name()
|
||||
.into_string()
|
||||
.map_err(|s| Error::with_msg(format!("Bad OS path {:?} path: {:?}", s, e.path())))?;
|
||||
if dn.len() != 10 {
|
||||
return Err(Error::with_msg(format!("bad split dirname path: {:?}", e.path())));
|
||||
}
|
||||
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
|
||||
if vv == 10 {
|
||||
let split: u64 = dn.parse()?;
|
||||
match &sfc.splits {
|
||||
Some(sps) => {
|
||||
if sps.contains(&split) {
|
||||
splits.push(split);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
splits.push(split);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut ret = vec![];
|
||||
for split in splits {
|
||||
let path = sfc
|
||||
.data_base_path
|
||||
.join(format!("{}_{}", sfc.ksprefix, fetch_info.ks()))
|
||||
.join("byTime")
|
||||
.join(fetch_info.name())
|
||||
.join(format!("{:019}", timebin))
|
||||
.join(format!("{:010}", split))
|
||||
.join(format!("{:019}_00000_Data", fetch_info.bs().ns() / MS));
|
||||
ret.push(path);
|
||||
}
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub fn channel_timebins_dir_path(fetch_info: &SfChFetchInfo, node: &Node) -> Result<PathBuf, Error> {
|
||||
let sfc = node.sf_databuffer.as_ref().unwrap();
|
||||
let ret = sfc
|
||||
.data_base_path
|
||||
.join(format!("{}_{}", sfc.ksprefix, fetch_info.ks()))
|
||||
.join("byTime")
|
||||
.join(fetch_info.name());
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub fn data_dir_path(ts: TsNano, fetch_info: &SfChFetchInfo, split: u32, node: &Node) -> Result<PathBuf, Error> {
|
||||
let ret = channel_timebins_dir_path(fetch_info, node)?
|
||||
.join(format!("{:019}", ts.ns() / fetch_info.bs().ns()))
|
||||
.join(format!("{:010}", split));
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub fn data_path(ts: TsNano, fetch_info: &SfChFetchInfo, split: u32, node: &Node) -> Result<PathBuf, Error> {
|
||||
let fname = format!("{:019}_{:05}_Data", fetch_info.bs().ns() / MS, 0);
|
||||
let ret = data_dir_path(ts, fetch_info, split, node)?.join(fname);
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub fn index_path(ts: TsNano, fetch_info: &SfChFetchInfo, split: u32, node: &Node) -> Result<PathBuf, Error> {
|
||||
let fname = format!("{:019}_{:05}_Data_Index", fetch_info.bs().ns() / MS, 0);
|
||||
let ret = data_dir_path(ts, fetch_info, split, node)?.join(fname);
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub fn data_dir_path_tb(ks: u32, channel_name: &str, tb: u32, split: u32, node: &Node) -> Result<PathBuf, Error> {
|
||||
let sfc = node.sf_databuffer.as_ref().unwrap();
|
||||
let ret = sfc
|
||||
.data_base_path
|
||||
.join(format!("{}_{}", sfc.ksprefix, ks))
|
||||
.join("byTime")
|
||||
.join(channel_name)
|
||||
.join(format!("{:019}", tb))
|
||||
.join(format!("{:010}", split));
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
pub fn data_path_tb(ks: u32, channel_name: &str, tb: u32, tbs: u32, split: u32, node: &Node) -> Result<PathBuf, Error> {
|
||||
let fname = format!("{:019}_{:05}_Data", tbs, 0);
|
||||
let ret = data_dir_path_tb(ks, channel_name, tb, split, node)?.join(fname);
|
||||
Ok(ret)
|
||||
}
|
||||
2
crates/disk/src/raw.rs
Normal file
2
crates/disk/src/raw.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod conn;
|
||||
pub mod generated;
|
||||
262
crates/disk/src/raw/conn.rs
Normal file
262
crates/disk/src/raw/conn.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
use crate::eventblobs::EventChunkerMultifile;
|
||||
use crate::eventchunker::EventChunkerConf;
|
||||
use crate::raw::generated::EventBlobsGeneratorI32Test00;
|
||||
use crate::raw::generated::EventBlobsGeneratorI32Test01;
|
||||
use err::Error;
|
||||
use futures_util::stream;
|
||||
use futures_util::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use items_0::streamitem::RangeCompletableItem;
|
||||
use items_0::streamitem::Sitemty;
|
||||
use items_0::streamitem::StreamItem;
|
||||
use items_2::channelevents::ChannelEvents;
|
||||
use items_2::eventfull::EventFull;
|
||||
use netpod::log::*;
|
||||
use netpod::range::evrange::NanoRange;
|
||||
use netpod::AggKind;
|
||||
use netpod::ByteSize;
|
||||
use netpod::DiskIoTune;
|
||||
use netpod::NodeConfigCached;
|
||||
use netpod::SfChFetchInfo;
|
||||
use query::api4::events::EventsSubQuery;
|
||||
use std::pin::Pin;
|
||||
|
||||
const TEST_BACKEND: &str = "testbackend-00";
|
||||
|
||||
fn make_num_pipeline_stream_evs(
|
||||
fetch_info: SfChFetchInfo,
|
||||
agg_kind: AggKind,
|
||||
event_blobs: EventChunkerMultifile,
|
||||
) -> Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>> {
|
||||
let scalar_type = fetch_info.scalar_type().clone();
|
||||
let shape = fetch_info.shape().clone();
|
||||
let event_stream = match crate::decode::EventsDynStream::new(scalar_type, shape, agg_kind, event_blobs) {
|
||||
Ok(k) => k,
|
||||
Err(e) => {
|
||||
return Box::pin(stream::iter([Err(e)]));
|
||||
}
|
||||
};
|
||||
let stream = event_stream.map(|item| match item {
|
||||
Ok(item) => match item {
|
||||
StreamItem::DataItem(item) => match item {
|
||||
RangeCompletableItem::RangeComplete => Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete)),
|
||||
RangeCompletableItem::Data(item) => Ok(StreamItem::DataItem(RangeCompletableItem::Data(
|
||||
ChannelEvents::Events(item),
|
||||
))),
|
||||
},
|
||||
StreamItem::Log(k) => Ok(StreamItem::Log(k)),
|
||||
StreamItem::Stats(k) => Ok(StreamItem::Stats(k)),
|
||||
},
|
||||
Err(e) => Err(e),
|
||||
});
|
||||
Box::pin(stream)
|
||||
}
|
||||
|
||||
pub async fn make_event_pipe(
|
||||
evq: EventsSubQuery,
|
||||
fetch_info: SfChFetchInfo,
|
||||
ncc: &NodeConfigCached,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>, Error> {
|
||||
// sf-databuffer type backends identify channels by their (backend, name) only.
|
||||
let range = evq.range().clone();
|
||||
let one_before = evq.transform().need_one_before_range();
|
||||
info!(
|
||||
"make_event_pipe need_expand {need_expand} {evq:?}",
|
||||
need_expand = one_before
|
||||
);
|
||||
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
|
||||
// TODO should not need this for correctness.
|
||||
// Should limit based on return size and latency.
|
||||
let out_max_len = if ncc.node_config.cluster.is_central_storage {
|
||||
128
|
||||
} else {
|
||||
128
|
||||
};
|
||||
let do_decompress = true;
|
||||
let event_blobs = EventChunkerMultifile::new(
|
||||
(&range).try_into()?,
|
||||
fetch_info.clone(),
|
||||
ncc.node.clone(),
|
||||
ncc.ix,
|
||||
DiskIoTune::default(),
|
||||
event_chunker_conf,
|
||||
one_before,
|
||||
do_decompress,
|
||||
out_max_len,
|
||||
);
|
||||
error!("TODO replace AggKind in the called code");
|
||||
let pipe = make_num_pipeline_stream_evs(fetch_info, AggKind::TimeWeightedScalar, event_blobs);
|
||||
Ok(pipe)
|
||||
}
|
||||
|
||||
pub fn make_local_event_blobs_stream(
|
||||
range: NanoRange,
|
||||
fetch_info: SfChFetchInfo,
|
||||
expand: bool,
|
||||
do_decompress: bool,
|
||||
event_chunker_conf: EventChunkerConf,
|
||||
disk_io_tune: DiskIoTune,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<EventChunkerMultifile, Error> {
|
||||
info!(
|
||||
"make_local_event_blobs_stream {fetch_info:?} do_decompress {do_decompress} disk_io_tune {disk_io_tune:?}"
|
||||
);
|
||||
if do_decompress {
|
||||
warn!("Possible issue: decompress central storage event blob stream");
|
||||
}
|
||||
// TODO should not need this for correctness.
|
||||
// Should limit based on return size and latency.
|
||||
let out_max_len = if node_config.node_config.cluster.is_central_storage {
|
||||
128
|
||||
} else {
|
||||
128
|
||||
};
|
||||
let event_blobs = EventChunkerMultifile::new(
|
||||
range,
|
||||
fetch_info.clone(),
|
||||
node_config.node.clone(),
|
||||
node_config.ix,
|
||||
disk_io_tune,
|
||||
event_chunker_conf,
|
||||
expand,
|
||||
do_decompress,
|
||||
out_max_len,
|
||||
);
|
||||
Ok(event_blobs)
|
||||
}
|
||||
|
||||
pub fn make_remote_event_blobs_stream(
|
||||
range: NanoRange,
|
||||
fetch_info: SfChFetchInfo,
|
||||
expand: bool,
|
||||
do_decompress: bool,
|
||||
event_chunker_conf: EventChunkerConf,
|
||||
disk_io_tune: DiskIoTune,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<impl Stream<Item = Sitemty<EventFull>>, Error> {
|
||||
debug!("make_remote_event_blobs_stream");
|
||||
// TODO should not need this for correctness.
|
||||
// Should limit based on return size and latency.
|
||||
let out_max_len = if node_config.node_config.cluster.is_central_storage {
|
||||
128
|
||||
} else {
|
||||
128
|
||||
};
|
||||
let event_blobs = EventChunkerMultifile::new(
|
||||
range,
|
||||
fetch_info.clone(),
|
||||
node_config.node.clone(),
|
||||
node_config.ix,
|
||||
disk_io_tune,
|
||||
event_chunker_conf,
|
||||
expand,
|
||||
do_decompress,
|
||||
out_max_len,
|
||||
);
|
||||
Ok(event_blobs)
|
||||
}
|
||||
|
||||
pub async fn make_event_blobs_pipe_real(
|
||||
subq: &EventsSubQuery,
|
||||
fetch_info: &SfChFetchInfo,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<EventFull>> + Send>>, Error> {
|
||||
if false {
|
||||
match dbconn::channel_exists(subq.name(), &node_config).await {
|
||||
Ok(_) => (),
|
||||
Err(e) => return Err(e)?,
|
||||
}
|
||||
}
|
||||
let expand = subq.transform().need_one_before_range();
|
||||
let range = subq.range();
|
||||
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
|
||||
// TODO should depend on host config
|
||||
let do_local = node_config.node_config.cluster.is_central_storage;
|
||||
let pipe = if do_local {
|
||||
let event_blobs = make_local_event_blobs_stream(
|
||||
range.try_into()?,
|
||||
fetch_info.clone(),
|
||||
expand,
|
||||
false,
|
||||
event_chunker_conf,
|
||||
DiskIoTune::default(),
|
||||
node_config,
|
||||
)?;
|
||||
Box::pin(event_blobs) as _
|
||||
} else {
|
||||
let event_blobs = make_remote_event_blobs_stream(
|
||||
range.try_into()?,
|
||||
fetch_info.clone(),
|
||||
expand,
|
||||
true,
|
||||
event_chunker_conf,
|
||||
DiskIoTune::default(),
|
||||
node_config,
|
||||
)?;
|
||||
/*
|
||||
type ItemType = Sitemty<EventFull>;
|
||||
let s = event_blobs.map(|item: ItemType| Box::new(item) as Box<dyn Framable + Send>);
|
||||
//let s = tracing_futures::Instrumented::instrument(s, tracing::info_span!("make_event_blobs_pipe"));
|
||||
let pipe: Pin<Box<dyn Stream<Item = Box<dyn Framable + Send>> + Send>>;
|
||||
pipe = Box::pin(s);
|
||||
pipe*/
|
||||
Box::pin(event_blobs) as _
|
||||
};
|
||||
Ok(pipe)
|
||||
}
|
||||
|
||||
pub async fn make_event_blobs_pipe_test(
|
||||
subq: &EventsSubQuery,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<EventFull>> + Send>>, Error> {
|
||||
warn!("GENERATE INMEM TEST DATA");
|
||||
let node_count = node_config.node_config.cluster.nodes.len() as u64;
|
||||
let node_ix = node_config.ix as u64;
|
||||
let chn = subq.name();
|
||||
let range = subq.range().clone();
|
||||
if chn == "test-gen-i32-dim0-v00" {
|
||||
Ok(Box::pin(EventBlobsGeneratorI32Test00::new(node_ix, node_count, range)))
|
||||
} else if chn == "test-gen-i32-dim0-v01" {
|
||||
Ok(Box::pin(EventBlobsGeneratorI32Test01::new(node_ix, node_count, range)))
|
||||
} else {
|
||||
let na: Vec<_> = chn.split("-").collect();
|
||||
if na.len() != 3 {
|
||||
Err(Error::with_msg_no_trace(format!(
|
||||
"can not understand test channel name: {chn:?}"
|
||||
)))
|
||||
} else {
|
||||
if na[0] != "inmem" {
|
||||
Err(Error::with_msg_no_trace(format!(
|
||||
"can not understand test channel name: {chn:?}"
|
||||
)))
|
||||
} else {
|
||||
if na[1] == "d0" {
|
||||
if na[2] == "i32" {
|
||||
Ok(Box::pin(EventBlobsGeneratorI32Test00::new(node_ix, node_count, range)))
|
||||
} else {
|
||||
Err(Error::with_msg_no_trace(format!(
|
||||
"can not understand test channel name: {chn:?}"
|
||||
)))
|
||||
}
|
||||
} else {
|
||||
Err(Error::with_msg_no_trace(format!(
|
||||
"can not understand test channel name: {chn:?}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn make_event_blobs_pipe(
|
||||
subq: &EventsSubQuery,
|
||||
fetch_info: &SfChFetchInfo,
|
||||
node_config: &NodeConfigCached,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<EventFull>> + Send>>, Error> {
|
||||
debug!("make_event_blobs_pipe {subq:?}");
|
||||
if subq.backend() == TEST_BACKEND {
|
||||
make_event_blobs_pipe_test(subq, node_config).await
|
||||
} else {
|
||||
make_event_blobs_pipe_real(subq, fetch_info, node_config).await
|
||||
}
|
||||
}
|
||||
225
crates/disk/src/raw/generated.rs
Normal file
225
crates/disk/src/raw/generated.rs
Normal file
@@ -0,0 +1,225 @@
|
||||
use futures_util::Future;
|
||||
use futures_util::FutureExt;
|
||||
use futures_util::Stream;
|
||||
use items_0::container::ByteEstimate;
|
||||
use items_0::streamitem::sitem_data;
|
||||
use items_0::streamitem::RangeCompletableItem;
|
||||
use items_0::streamitem::Sitemty;
|
||||
use items_0::streamitem::StreamItem;
|
||||
use items_0::Empty;
|
||||
use items_2::eventfull::EventFull;
|
||||
use netpod::range::evrange::SeriesRange;
|
||||
use netpod::timeunits::MS;
|
||||
use netpod::ScalarType;
|
||||
use netpod::Shape;
|
||||
use std::pin::Pin;
|
||||
use std::task::Context;
|
||||
use std::task::Poll;
|
||||
use std::time::Duration;
|
||||
|
||||
pub trait TypedGenerator {
|
||||
type RustScalar;
|
||||
}
|
||||
|
||||
pub struct EventBlobsGeneratorI32Test00 {
|
||||
ts: u64,
|
||||
dts: u64,
|
||||
tsend: u64,
|
||||
#[allow(unused)]
|
||||
c1: u64,
|
||||
scalar_type: ScalarType,
|
||||
be: bool,
|
||||
shape: Shape,
|
||||
timeout: Option<Pin<Box<dyn Future<Output = ()> + Send>>>,
|
||||
done: bool,
|
||||
done_range_final: bool,
|
||||
}
|
||||
|
||||
impl TypedGenerator for EventBlobsGeneratorI32Test00 {
|
||||
type RustScalar = i32;
|
||||
}
|
||||
|
||||
impl EventBlobsGeneratorI32Test00 {
|
||||
pub fn new(node_ix: u64, node_count: u64, range: SeriesRange) -> Self {
|
||||
let range = match range {
|
||||
SeriesRange::TimeRange(k) => k,
|
||||
SeriesRange::PulseRange(_) => todo!(),
|
||||
};
|
||||
let dt = MS * 1000;
|
||||
let ts = (range.beg / dt + node_ix) * dt;
|
||||
let dts = dt * node_count as u64;
|
||||
let tsend = range.end;
|
||||
Self {
|
||||
ts,
|
||||
dts,
|
||||
tsend,
|
||||
c1: 0,
|
||||
scalar_type: ScalarType::I32,
|
||||
be: true,
|
||||
shape: Shape::Scalar,
|
||||
timeout: None,
|
||||
done: false,
|
||||
done_range_final: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn make_batch(&mut self) -> Sitemty<EventFull> {
|
||||
// TODO should not repeat self type name
|
||||
type T = <EventBlobsGeneratorI32Test00 as TypedGenerator>::RustScalar;
|
||||
let mut item = EventFull::empty();
|
||||
let mut ts = self.ts;
|
||||
loop {
|
||||
if ts >= self.tsend || item.byte_estimate() > 200 {
|
||||
break;
|
||||
}
|
||||
let pulse = ts;
|
||||
let value = (ts / (MS * 100) % 1000) as T;
|
||||
item.add_event(
|
||||
ts,
|
||||
pulse,
|
||||
Some(value.to_be_bytes().to_vec()),
|
||||
None,
|
||||
self.scalar_type.clone(),
|
||||
self.be,
|
||||
self.shape.clone(),
|
||||
None,
|
||||
);
|
||||
ts += self.dts;
|
||||
}
|
||||
self.ts = ts;
|
||||
let w = sitem_data(item);
|
||||
w
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for EventBlobsGeneratorI32Test00 {
|
||||
type Item = Sitemty<EventFull>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
loop {
|
||||
break if self.done_range_final {
|
||||
Ready(None)
|
||||
} else if self.ts >= self.tsend {
|
||||
self.done = true;
|
||||
self.done_range_final = true;
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
|
||||
} else if false {
|
||||
// To use the generator without throttling, use this scope
|
||||
Ready(Some(self.make_batch()))
|
||||
} else if let Some(fut) = self.timeout.as_mut() {
|
||||
match fut.poll_unpin(cx) {
|
||||
Ready(()) => {
|
||||
self.timeout = None;
|
||||
Ready(Some(self.make_batch()))
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
} else {
|
||||
self.timeout = Some(Box::pin(tokio::time::sleep(Duration::from_millis(2))));
|
||||
continue;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EventBlobsGeneratorI32Test01 {
|
||||
ts: u64,
|
||||
dts: u64,
|
||||
tsend: u64,
|
||||
#[allow(unused)]
|
||||
c1: u64,
|
||||
scalar_type: ScalarType,
|
||||
be: bool,
|
||||
shape: Shape,
|
||||
timeout: Option<Pin<Box<dyn Future<Output = ()> + Send>>>,
|
||||
done: bool,
|
||||
done_range_final: bool,
|
||||
}
|
||||
|
||||
impl TypedGenerator for EventBlobsGeneratorI32Test01 {
|
||||
type RustScalar = i32;
|
||||
}
|
||||
|
||||
impl EventBlobsGeneratorI32Test01 {
|
||||
pub fn new(node_ix: u64, node_count: u64, range: SeriesRange) -> Self {
|
||||
let range = match range {
|
||||
SeriesRange::TimeRange(k) => k,
|
||||
SeriesRange::PulseRange(_) => todo!(),
|
||||
};
|
||||
let dt = MS * 500;
|
||||
let ts = (range.beg / dt + node_ix) * dt;
|
||||
let dts = dt * node_count as u64;
|
||||
let tsend = range.end;
|
||||
Self {
|
||||
ts,
|
||||
dts,
|
||||
tsend,
|
||||
c1: 0,
|
||||
scalar_type: ScalarType::I32,
|
||||
be: true,
|
||||
shape: Shape::Scalar,
|
||||
timeout: None,
|
||||
done: false,
|
||||
done_range_final: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn make_batch(&mut self) -> Sitemty<EventFull> {
|
||||
type T = i32;
|
||||
let mut item = EventFull::empty();
|
||||
let mut ts = self.ts;
|
||||
loop {
|
||||
if self.ts >= self.tsend || item.byte_estimate() > 400 {
|
||||
break;
|
||||
}
|
||||
let pulse = ts;
|
||||
let value = (ts / self.dts) as T;
|
||||
item.add_event(
|
||||
ts,
|
||||
pulse,
|
||||
Some(value.to_be_bytes().to_vec()),
|
||||
None,
|
||||
self.scalar_type.clone(),
|
||||
self.be,
|
||||
self.shape.clone(),
|
||||
None,
|
||||
);
|
||||
ts += self.dts;
|
||||
}
|
||||
self.ts = ts;
|
||||
let w = sitem_data(item);
|
||||
w
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for EventBlobsGeneratorI32Test01 {
|
||||
type Item = Sitemty<EventFull>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
loop {
|
||||
break if self.done_range_final {
|
||||
Ready(None)
|
||||
} else if self.ts >= self.tsend {
|
||||
self.done = true;
|
||||
self.done_range_final = true;
|
||||
Ready(Some(Ok(StreamItem::DataItem(RangeCompletableItem::RangeComplete))))
|
||||
} else if false {
|
||||
// To use the generator without throttling, use this scope
|
||||
Ready(Some(self.make_batch()))
|
||||
} else if let Some(fut) = self.timeout.as_mut() {
|
||||
match fut.poll_unpin(cx) {
|
||||
Ready(()) => {
|
||||
self.timeout = None;
|
||||
Ready(Some(self.make_batch()))
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
} else {
|
||||
self.timeout = Some(Box::pin(tokio::time::sleep(Duration::from_millis(2))));
|
||||
continue;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
180
crates/disk/src/read3.rs
Normal file
180
crates/disk/src/read3.rs
Normal file
@@ -0,0 +1,180 @@
|
||||
use bytes::BytesMut;
|
||||
use err::Error;
|
||||
use netpod::log::*;
|
||||
use std::os::unix::prelude::RawFd;
|
||||
use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
|
||||
use std::sync::Once;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
const DO_TRACE: bool = false;
|
||||
|
||||
static READ3: AtomicPtr<Read3> = AtomicPtr::new(std::ptr::null_mut());
|
||||
|
||||
pub struct ReadTask {
|
||||
fd: RawFd,
|
||||
pos: u64,
|
||||
count: u64,
|
||||
rescell: oneshot::Sender<Result<ReadResult, Error>>,
|
||||
}
|
||||
|
||||
pub struct ReadResult {
|
||||
pub buf: BytesMut,
|
||||
pub eof: bool,
|
||||
}
|
||||
|
||||
pub struct Read3 {
|
||||
jobs_tx: mpsc::Sender<ReadTask>,
|
||||
rtx: crossbeam::channel::Sender<mpsc::Receiver<ReadTask>>,
|
||||
threads_max: AtomicUsize,
|
||||
can_not_publish: AtomicUsize,
|
||||
}
|
||||
|
||||
impl Read3 {
|
||||
pub fn get() -> &'static Self {
|
||||
static INIT: Once = Once::new();
|
||||
INIT.call_once(|| {
|
||||
let (jtx, jrx) = mpsc::channel(512);
|
||||
let (rtx, rrx) = crossbeam::channel::bounded(32);
|
||||
let read3 = Read3 {
|
||||
jobs_tx: jtx,
|
||||
rtx,
|
||||
threads_max: AtomicUsize::new(32),
|
||||
can_not_publish: AtomicUsize::new(0),
|
||||
};
|
||||
let b = Box::new(read3);
|
||||
let ptr = Box::into_raw(b);
|
||||
READ3.store(ptr, Ordering::Release);
|
||||
let ptr = READ3.load(Ordering::Acquire);
|
||||
let h = unsafe { &*ptr };
|
||||
if let Err(_) = h.rtx.send(jrx) {
|
||||
error!("Read3 INIT: can not enqueue main job reader");
|
||||
}
|
||||
for wid in 0..128 {
|
||||
let rrx = rrx.clone();
|
||||
tokio::task::spawn_blocking(move || h.read_worker(wid, rrx));
|
||||
}
|
||||
});
|
||||
let ptr = READ3.load(Ordering::Acquire);
|
||||
unsafe { &*ptr }
|
||||
}
|
||||
|
||||
pub fn threads_max(&self) -> usize {
|
||||
self.threads_max.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
pub fn set_threads_max(&self, max: usize) {
|
||||
self.threads_max.store(max, Ordering::Release);
|
||||
}
|
||||
|
||||
pub async fn read(&self, fd: RawFd, pos: u64, count: u64) -> Result<ReadResult, Error> {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
let rt = ReadTask {
|
||||
fd,
|
||||
pos,
|
||||
count,
|
||||
rescell: tx,
|
||||
};
|
||||
match self.jobs_tx.send(rt).await {
|
||||
Ok(_) => match rx.await {
|
||||
Ok(res) => res,
|
||||
Err(e) => Err(Error::with_msg(format!("can not receive read task result: {e}"))),
|
||||
},
|
||||
Err(e) => Err(Error::with_msg(format!("can not send read job task: {e}"))),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_worker(&self, wid: u32, rrx: crossbeam::channel::Receiver<mpsc::Receiver<ReadTask>>) {
|
||||
'outer: loop {
|
||||
while wid as usize >= self.threads_max.load(Ordering::Acquire) {
|
||||
std::thread::sleep(Duration::from_millis(4000));
|
||||
}
|
||||
match rrx.recv() {
|
||||
Ok(mut jrx) => match jrx.blocking_recv() {
|
||||
Some(rt) => match self.rtx.send(jrx) {
|
||||
Ok(_) => self.read_worker_job(wid, rt),
|
||||
Err(e) => {
|
||||
error!("can not return the job receiver: wid {wid} {e}");
|
||||
break 'outer;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
let _ = self.rtx.send(jrx);
|
||||
break 'outer;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!("read_worker sees: wid {wid} {e}");
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_worker_job(&self, wid: u32, rt: ReadTask) {
|
||||
let ts1 = Instant::now();
|
||||
let mut prc = 0;
|
||||
let fd = rt.fd;
|
||||
let mut rpos = rt.pos;
|
||||
let mut buf = BytesMut::with_capacity(rt.count as usize);
|
||||
let mut writable = rt.count as usize;
|
||||
let rr = loop {
|
||||
if DO_TRACE {
|
||||
trace!("do pread fd {fd} count {writable} offset {rpos} wid {wid}");
|
||||
}
|
||||
let ec = unsafe { libc::pread(fd, buf.as_mut_ptr() as _, writable, rpos as i64) };
|
||||
prc += 1;
|
||||
if ec == -1 {
|
||||
let errno = unsafe { *libc::__errno_location() };
|
||||
if errno == libc::EINVAL {
|
||||
debug!("pread EOF fd {fd} count {writable} offset {rpos} wid {wid}");
|
||||
let rr = ReadResult { buf, eof: true };
|
||||
break Ok(rr);
|
||||
} else {
|
||||
warn!("pread ERROR errno {errno} fd {fd} count {writable} offset {rpos} wid {wid}");
|
||||
// TODO use a more structured error
|
||||
let e = Error::with_msg_no_trace(format!(
|
||||
"pread ERROR errno {errno} fd {fd} count {writable} offset {rpos} wid {wid}"
|
||||
));
|
||||
break Err(e);
|
||||
}
|
||||
} else if ec == 0 {
|
||||
debug!("pread EOF fd {fd} count {writable} offset {rpos} wid {wid} prc {prc}");
|
||||
let rr = ReadResult { buf, eof: true };
|
||||
break Ok(rr);
|
||||
} else if ec > 0 {
|
||||
if ec as usize > writable {
|
||||
error!("pread TOOLARGE ec {ec} fd {fd} count {writable} offset {rpos} wid {wid} prc {prc}");
|
||||
return;
|
||||
} else {
|
||||
rpos += ec as u64;
|
||||
writable -= ec as usize;
|
||||
unsafe { buf.set_len(buf.len() + (ec as usize)) };
|
||||
if writable == 0 {
|
||||
let ts2 = Instant::now();
|
||||
let dur = ts2.duration_since(ts1);
|
||||
let dms = 1e3 * dur.as_secs_f32();
|
||||
if DO_TRACE {
|
||||
trace!("pread DONE ec {ec} fd {fd} wid {wid} prc {prc} dms {dms:.2}");
|
||||
}
|
||||
let rr = ReadResult { buf, eof: false };
|
||||
break Ok(rr);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!(
|
||||
"pread UNEXPECTED ec {} fd {} count {} offset {rpos} wid {wid}",
|
||||
ec, rt.fd, writable
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
match rt.rescell.send(rr) {
|
||||
Ok(_) => {}
|
||||
Err(_) => {
|
||||
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
|
||||
warn!("can not publish the read result wid {wid}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
227
crates/disk/src/read4.rs
Normal file
227
crates/disk/src/read4.rs
Normal file
@@ -0,0 +1,227 @@
|
||||
use bytes::BytesMut;
|
||||
use err::Error;
|
||||
use netpod::log::*;
|
||||
use std::os::unix::prelude::RawFd;
|
||||
use std::sync::atomic::{AtomicPtr, AtomicUsize, Ordering};
|
||||
use std::sync::Once;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
static READ4: AtomicPtr<Read4> = AtomicPtr::new(std::ptr::null_mut());
|
||||
|
||||
const DO_TRACE: bool = false;
|
||||
|
||||
pub struct ReadTask {
|
||||
fd: RawFd,
|
||||
buflen: u64,
|
||||
read_queue_len: usize,
|
||||
results: mpsc::Sender<Result<ReadResult, Error>>,
|
||||
}
|
||||
|
||||
pub struct ReadResult {
|
||||
pub buf: BytesMut,
|
||||
pub eof: bool,
|
||||
}
|
||||
|
||||
pub struct Read4 {
|
||||
jobs_tx: mpsc::Sender<ReadTask>,
|
||||
rtx: crossbeam::channel::Sender<mpsc::Receiver<ReadTask>>,
|
||||
threads_max: AtomicUsize,
|
||||
can_not_publish: AtomicUsize,
|
||||
}
|
||||
|
||||
impl Read4 {
|
||||
pub fn get() -> &'static Self {
|
||||
static INIT: Once = Once::new();
|
||||
INIT.call_once(|| {
|
||||
let (jtx, jrx) = mpsc::channel(512);
|
||||
let (rtx, rrx) = crossbeam::channel::bounded(32);
|
||||
let read4 = Read4 {
|
||||
jobs_tx: jtx,
|
||||
rtx,
|
||||
threads_max: AtomicUsize::new(32),
|
||||
can_not_publish: AtomicUsize::new(0),
|
||||
};
|
||||
let b = Box::new(read4);
|
||||
let ptr = Box::into_raw(b);
|
||||
READ4.store(ptr, Ordering::Release);
|
||||
let ptr = READ4.load(Ordering::Acquire);
|
||||
let h = unsafe { &*ptr };
|
||||
if let Err(_) = h.rtx.send(jrx) {
|
||||
error!("Read4 INIT: can not enqueue main job reader");
|
||||
}
|
||||
for wid in 0..16 {
|
||||
let rrx = rrx.clone();
|
||||
tokio::task::spawn_blocking(move || h.read_worker(wid, rrx));
|
||||
}
|
||||
});
|
||||
let ptr = READ4.load(Ordering::Acquire);
|
||||
unsafe { &*ptr }
|
||||
}
|
||||
|
||||
pub fn threads_max(&self) -> usize {
|
||||
self.threads_max.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
pub fn set_threads_max(&self, max: usize) {
|
||||
self.threads_max.store(max, Ordering::Release);
|
||||
}
|
||||
|
||||
pub async fn read(
|
||||
&self,
|
||||
fd: RawFd,
|
||||
buflen: u64,
|
||||
read_queue_len: usize,
|
||||
) -> Result<mpsc::Receiver<Result<ReadResult, Error>>, Error> {
|
||||
let (tx, rx) = mpsc::channel(32);
|
||||
let rt = ReadTask {
|
||||
fd,
|
||||
buflen,
|
||||
read_queue_len,
|
||||
results: tx,
|
||||
};
|
||||
match self.jobs_tx.send(rt).await {
|
||||
Ok(_) => Ok(rx),
|
||||
Err(e) => Err(Error::with_msg(format!("can not send read job task: {e}"))),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_worker(&self, wid: u32, rrx: crossbeam::channel::Receiver<mpsc::Receiver<ReadTask>>) {
|
||||
loop {
|
||||
while wid as usize >= self.threads_max.load(Ordering::Acquire) {
|
||||
std::thread::sleep(Duration::from_millis(4000));
|
||||
}
|
||||
match rrx.recv() {
|
||||
Ok(mut jrx) => match jrx.blocking_recv() {
|
||||
Some(rt) => match self.rtx.send(jrx) {
|
||||
Ok(_) => self.read_worker_job(wid, rt),
|
||||
Err(e) => {
|
||||
error!("can not return the job receiver: wid {wid} {e}");
|
||||
return;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
let _ = self.rtx.send(jrx);
|
||||
return;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!("read_worker sees: wid {wid} {e}");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_worker_job(&self, wid: u32, rt: ReadTask) {
|
||||
let fd = rt.fd;
|
||||
let ec = unsafe { libc::lseek(fd, 0, libc::SEEK_CUR) };
|
||||
if ec == -1 {
|
||||
let errno = unsafe { *libc::__errno_location() };
|
||||
let msg = format!("seek error wid {wid} fd {fd} errno {errno}");
|
||||
error!("{}", msg);
|
||||
let e = Error::with_msg_no_trace(msg);
|
||||
match rt.results.blocking_send(Err(e)) {
|
||||
Ok(_) => {}
|
||||
Err(_) => {
|
||||
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
|
||||
error!("Can not publish error");
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
let mut rpos = ec as u64;
|
||||
let mut apos = rpos / rt.buflen * rt.buflen;
|
||||
let mut prc = 0;
|
||||
loop {
|
||||
let ts1 = Instant::now();
|
||||
while apos < rpos + rt.read_queue_len as u64 * rt.buflen {
|
||||
if DO_TRACE {
|
||||
trace!("READAHEAD wid {wid} fd {fd} apos {apos}");
|
||||
}
|
||||
let n = unsafe { libc::readahead(fd, apos as _, rt.buflen as _) };
|
||||
if n == -1 {
|
||||
let errno = unsafe { *libc::__errno_location() };
|
||||
let msg = format!("READAHEAD ERROR wid {wid} errno {errno} fd {fd} apos {apos}");
|
||||
warn!("{}", msg);
|
||||
// TODO use a more structured error
|
||||
let e = Error::with_msg_no_trace(msg);
|
||||
match rt.results.blocking_send(Err(e)) {
|
||||
Ok(_) => {}
|
||||
Err(_) => {
|
||||
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
|
||||
warn!("can not publish the read result wid {wid}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
apos += rt.buflen;
|
||||
}
|
||||
}
|
||||
if DO_TRACE {
|
||||
trace!("READ wid {wid} fd {fd} rpos {rpos}");
|
||||
}
|
||||
let mut buf = BytesMut::with_capacity(rt.buflen as usize);
|
||||
let bufptr = buf.as_mut_ptr() as _;
|
||||
let buflen = buf.capacity() as _;
|
||||
let ec = unsafe { libc::read(fd, bufptr, buflen) };
|
||||
prc += 1;
|
||||
if ec == -1 {
|
||||
let errno = unsafe { *libc::__errno_location() };
|
||||
{
|
||||
let msg = format!("READ ERROR wid {wid} errno {errno} fd {fd} offset {rpos}");
|
||||
warn!("{}", msg);
|
||||
// TODO use a more structured error
|
||||
let e = Error::with_msg_no_trace(msg);
|
||||
match rt.results.blocking_send(Err(e)) {
|
||||
Ok(_) => {}
|
||||
Err(_) => {
|
||||
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
|
||||
warn!("can not publish the read result wid {wid}");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ec == 0 {
|
||||
debug!("READ EOF wid {wid} prc {prc} fd {fd} offset {rpos} prc {prc}");
|
||||
let rr = ReadResult { buf, eof: true };
|
||||
match rt.results.blocking_send(Ok(rr)) {
|
||||
Ok(_) => {}
|
||||
Err(_) => {
|
||||
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
|
||||
warn!("can not publish the read result wid {wid}");
|
||||
return;
|
||||
}
|
||||
}
|
||||
return;
|
||||
} else if ec > 0 {
|
||||
if ec as usize > buf.capacity() {
|
||||
error!("READ TOOLARGE wid {wid} ec {ec} fd {fd} offset {rpos} prc {prc}");
|
||||
return;
|
||||
} else {
|
||||
rpos += ec as u64;
|
||||
unsafe { buf.set_len(buf.len() + (ec as usize)) };
|
||||
{
|
||||
let ts2 = Instant::now();
|
||||
let dur = ts2.duration_since(ts1);
|
||||
let dms = 1e3 * dur.as_secs_f32();
|
||||
if DO_TRACE {
|
||||
trace!("READ DONE wid {wid} ec {ec} fd {fd} prc {prc} dms {dms:.2}");
|
||||
}
|
||||
let rr = ReadResult { buf, eof: false };
|
||||
match rt.results.blocking_send(Ok(rr)) {
|
||||
Ok(_) => {}
|
||||
Err(_) => {
|
||||
self.can_not_publish.fetch_add(1, Ordering::AcqRel);
|
||||
warn!("can not publish the read result wid {wid}");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
error!("READ UNEXPECTED wid {wid} ec {ec} fd {fd} offset {rpos}");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
50
crates/disk/src/streamlog.rs
Normal file
50
crates/disk/src/streamlog.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use items_0::streamitem::LogItem;
|
||||
use netpod::log::*;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
pub struct Streamlog {
|
||||
items: VecDeque<LogItem>,
|
||||
node_ix: u32,
|
||||
}
|
||||
|
||||
impl Streamlog {
|
||||
pub fn new(node_ix: u32) -> Self {
|
||||
Self {
|
||||
items: VecDeque::new(),
|
||||
node_ix,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append(&mut self, level: Level, msg: String) {
|
||||
let item = LogItem {
|
||||
node_ix: self.node_ix,
|
||||
level,
|
||||
msg,
|
||||
};
|
||||
self.items.push_back(item);
|
||||
}
|
||||
|
||||
pub fn pop(&mut self) -> Option<LogItem> {
|
||||
self.items.pop_back()
|
||||
}
|
||||
|
||||
pub fn emit(item: &LogItem) {
|
||||
match item.level {
|
||||
Level::ERROR => {
|
||||
error!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
Level::WARN => {
|
||||
warn!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
Level::INFO => {
|
||||
info!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
Level::DEBUG => {
|
||||
debug!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
Level::TRACE => {
|
||||
trace!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user