Mark closed pulse map files

This commit is contained in:
Dominik Werder
2021-09-17 20:38:20 +02:00
parent ad7f25b4c6
commit e1e930f453
14 changed files with 263 additions and 161 deletions

View File

@@ -13,7 +13,6 @@ pub fn make_test_node(id: u32) -> Node {
port_raw: 8800 + id as u16 + 100,
data_base_path: format!("../tmpdata/node{:02}", id).into(),
cache_base_path: format!("../tmpdata/node{:02}", id).into(),
split: id,
ksprefix: "ks".into(),
backend: "testbackend".into(),
archiver_appliance: None,

View File

@@ -375,21 +375,21 @@ impl EventChunker {
let ts2 = Instant::now();
let dt = ts2.duration_since(ts1);
self.decomp_dt_histo.ingest(dt.as_secs() as u32 + dt.subsec_micros());
decomp
Some(decomp)
}
Err(e) => {
return Err(Error::with_msg(format!("decompression failed {:?}", e)))?;
}
}
} else {
BytesMut::new()
None
}
};
ret.add_event(
ts,
pulse,
buf.as_ref()[(p1 as usize)..(p1 as usize + k1 as usize)].to_vec(),
Some(decomp),
decomp,
ScalarType::from_dtype_index(type_index)?,
is_big_endian,
shape_this,
@@ -401,6 +401,7 @@ impl EventChunker {
Err(Error::with_msg(msg))?;
}
let vlen = len - p1 as u32 - 4;
// TODO in this case, decomp and comp is the same and not needed.
let decomp = BytesMut::from(&buf[p1 as usize..(p1 as u32 + vlen) as usize]);
ret.add_event(
ts,

View File

@@ -120,7 +120,6 @@ pub async fn gen_test_data() -> Result<(), Error> {
listen: "0.0.0.0".into(),
port: 7780 + i1 as u16,
port_raw: 7780 + i1 as u16 + 100,
split: i1,
data_base_path: data_base_path.join(format!("node{:02}", i1)),
cache_base_path: data_base_path.join(format!("node{:02}", i1)),
ksprefix: ksprefix.clone(),
@@ -129,8 +128,8 @@ pub async fn gen_test_data() -> Result<(), Error> {
};
ensemble.nodes.push(node);
}
for node in &ensemble.nodes {
gen_node(node, &ensemble).await?;
for (split, node) in ensemble.nodes.iter().enumerate() {
gen_node(split as u32, node, &ensemble).await?;
}
Ok(())
}
@@ -146,14 +145,14 @@ pub struct ChannelGenProps {
gen_var: GenVar,
}
async fn gen_node(node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
async fn gen_node(split: u32, node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
for chn in &ensemble.channels {
gen_channel(chn, node, ensemble).await?
gen_channel(chn, split, node, ensemble).await?
}
Ok(())
}
async fn gen_channel(chn: &ChannelGenProps, node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
async fn gen_channel(chn: &ChannelGenProps, split: u32, node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
let config_path = node.data_base_path.join("config").join(&chn.config.channel.name);
let channel_path = node
.data_base_path
@@ -175,6 +174,7 @@ async fn gen_channel(chn: &ChannelGenProps, node: &Node, ensemble: &Ensemble) ->
chn.time_spacing,
&channel_path,
&chn.config,
split,
node,
ensemble,
&chn.gen_var,
@@ -321,14 +321,13 @@ async fn gen_timebin(
ts_spacing: u64,
channel_path: &Path,
config: &ChannelConfig,
split: u32,
node: &Node,
ensemble: &Ensemble,
gen_var: &GenVar,
) -> Result<GenTimebinRes, Error> {
let tb = ts.ns / config.time_bin_size.ns;
let path = channel_path
.join(format!("{:019}", tb))
.join(format!("{:010}", node.split));
let path = channel_path.join(format!("{:019}", tb)).join(format!("{:010}", split));
tokio::fs::create_dir_all(&path).await?;
let data_path = path.join(format!("{:019}_{:05}_Data", config.time_bin_size.ns / MS, 0));
let index_path = path.join(format!("{:019}_{:05}_Data_Index", config.time_bin_size.ns / MS, 0));
@@ -363,20 +362,22 @@ async fn gen_timebin(
};
while ts.ns < tsmax.ns {
match gen_var {
// TODO
// Splits and nodes are not in 1-to-1 correspondence.
GenVar::Default => {
if evix % ensemble.nodes.len() as u64 == node.split as u64 {
if evix % ensemble.nodes.len() as u64 == split as u64 {
gen_event(&mut file, index_file.as_mut(), evix, ts, pulse, config, gen_var).await?;
}
}
GenVar::ConstRegular => {
if evix % ensemble.nodes.len() as u64 == node.split as u64 {
if evix % ensemble.nodes.len() as u64 == split as u64 {
gen_event(&mut file, index_file.as_mut(), evix, ts, pulse, config, gen_var).await?;
}
}
GenVar::TimeWeight => {
let m = evix % 20;
if m == 0 || m == 1 {
if evix % ensemble.nodes.len() as u64 == node.split as u64 {
if evix % ensemble.nodes.len() as u64 == split as u64 {
gen_event(&mut file, index_file.as_mut(), evix, ts, pulse, config, gen_var).await?;
}
}

View File

@@ -4,6 +4,7 @@ use err::Error;
use futures_core::Stream;
use futures_util::future::FusedFuture;
use futures_util::StreamExt;
use netpod::histo::HistoLog2;
use netpod::{log::*, FileIoBufferSize};
use netpod::{ChannelConfig, Node, Shape};
use std::future::Future;
@@ -36,8 +37,9 @@ pub mod paths;
pub mod raw;
pub mod streamlog;
// TODO transform this into a self-test or remove.
pub async fn read_test_1(query: &netpod::AggQuerySingleChannel, node: Node) -> Result<netpod::BodyStream, Error> {
let path = paths::datapath(query.timebin as u64, &query.channel_config, &node);
let path = paths::datapath(query.timebin as u64, &query.channel_config, 0, &node);
debug!("try path: {:?}", path);
let fin = OpenOptions::new().read(true).open(path).await?;
let meta = fin.metadata().await;
@@ -270,7 +272,7 @@ pub struct NeedMinBuffer {
inp: Pin<Box<dyn Stream<Item = Result<FileChunkRead, Error>> + Send>>,
need_min: u32,
left: Option<FileChunkRead>,
buf_len_histo: [u32; 16],
buf_len_histo: HistoLog2,
errored: bool,
completed: bool,
}
@@ -281,7 +283,7 @@ impl NeedMinBuffer {
inp: inp,
need_min: 1,
left: None,
buf_len_histo: Default::default(),
buf_len_histo: HistoLog2::new(8),
errored: false,
completed: false,
}
@@ -300,7 +302,7 @@ impl NeedMinBuffer {
// TODO remove this again
impl Drop for NeedMinBuffer {
fn drop(&mut self) {
info!("NeedMinBuffer histo: {:?}", self.buf_len_histo);
info!("NeedMinBuffer Drop Stats:\nbuf_len_histo: {:?}", self.buf_len_histo);
}
}
@@ -320,23 +322,7 @@ impl Stream for NeedMinBuffer {
let mut again = false;
let z = match self.inp.poll_next_unpin(cx) {
Ready(Some(Ok(fcr))) => {
const SUB: usize = 8;
let mut u = fcr.buf.len();
let mut po = 0;
while u != 0 && po < 15 {
u = u >> 1;
po += 1;
}
let po = if po >= self.buf_len_histo.len() + SUB {
self.buf_len_histo.len() - 1
} else {
if po > SUB {
po - SUB
} else {
0
}
};
self.buf_len_histo[po] += 1;
self.buf_len_histo.ingest(fcr.buf.len() as u32);
//info!("NeedMinBuffer got buf len {}", fcr.buf.len());
match self.left.take() {
Some(mut lfcr) => {

View File

@@ -5,14 +5,14 @@ use netpod::timeunits::MS;
use netpod::{ChannelConfig, Nanos, Node};
use std::path::PathBuf;
// TODO remove this
pub fn datapath(timebin: u64, config: &netpod::ChannelConfig, node: &Node) -> PathBuf {
// TODO remove/replace this
pub fn datapath(timebin: u64, config: &netpod::ChannelConfig, split: u32, node: &Node) -> PathBuf {
node.data_base_path
.join(format!("{}_{}", node.ksprefix, config.keyspace))
.join("byTime")
.join(config.channel.name.clone())
.join(format!("{:019}", timebin))
.join(format!("{:010}", node.split))
.join(format!("{:010}", split))
.join(format!("{:019}_00000_Data", config.time_bin_size.ns / MS))
}
@@ -77,22 +77,22 @@ pub fn channel_timebins_dir_path(channel_config: &ChannelConfig, node: &Node) ->
Ok(ret)
}
pub fn data_dir_path(ts: Nanos, channel_config: &ChannelConfig, node: &Node) -> Result<PathBuf, Error> {
pub fn data_dir_path(ts: Nanos, channel_config: &ChannelConfig, split: u32, node: &Node) -> Result<PathBuf, Error> {
let ret = channel_timebins_dir_path(channel_config, node)?
.join(format!("{:019}", ts.ns / channel_config.time_bin_size.ns))
.join(format!("{:010}", node.split));
.join(format!("{:010}", split));
Ok(ret)
}
pub fn data_path(ts: Nanos, channel_config: &ChannelConfig, node: &Node) -> Result<PathBuf, Error> {
pub fn data_path(ts: Nanos, channel_config: &ChannelConfig, split: u32, node: &Node) -> Result<PathBuf, Error> {
let fname = format!("{:019}_{:05}_Data", channel_config.time_bin_size.ns / MS, 0);
let ret = data_dir_path(ts, channel_config, node)?.join(fname);
let ret = data_dir_path(ts, channel_config, split, node)?.join(fname);
Ok(ret)
}
pub fn index_path(ts: Nanos, channel_config: &ChannelConfig, node: &Node) -> Result<PathBuf, Error> {
pub fn index_path(ts: Nanos, channel_config: &ChannelConfig, split: u32, node: &Node) -> Result<PathBuf, Error> {
let fname = format!("{:019}_{:05}_Data_Index", channel_config.time_bin_size.ns / MS, 0);
let ret = data_dir_path(ts, channel_config, node)?.join(fname);
let ret = data_dir_path(ts, channel_config, split, node)?.join(fname);
Ok(ret)
}