This commit is contained in:
Dominik Werder
2021-04-16 14:38:46 +02:00
parent f2e4ac3a35
commit 1150bb3c55
12 changed files with 462 additions and 59 deletions

View File

@@ -736,6 +736,7 @@ async fn agg_x_dim_0_inner() {
},
keyspace: 2,
time_bin_size: DAY,
array: false,
shape: Shape::Scalar,
scalar_type: ScalarType::F64,
big_endian: true,
@@ -791,6 +792,7 @@ async fn agg_x_dim_1_inner() {
},
keyspace: 3,
time_bin_size: DAY,
array: true,
shape: Shape::Wave(1024),
scalar_type: ScalarType::F64,
big_endian: true,
@@ -840,6 +842,7 @@ async fn merge_0_inner() {
},
keyspace: 3,
time_bin_size: DAY,
array: true,
shape: Shape::Wave(17),
scalar_type: ScalarType::F64,
big_endian: true,

View File

@@ -54,7 +54,7 @@ pub fn binned_bytes_for_http(node_config: Arc<NodeConfig>, query: &Query) -> Res
// TODO
// Translate the Query TimeRange + AggKind into an iterator over the pre-binned patches.
let grid = PreBinnedPatchRange::covering_range(query.range.clone(), query.count, 0);
let grid = PreBinnedPatchRange::covering_range(query.range.clone(), query.count);
match grid {
Some(spec) => {
info!("GOT PreBinnedPatchGridSpec: {:?}", spec);
@@ -215,13 +215,13 @@ impl PreBinnedValueStream {
beg: self.patch_coord.patch_beg(),
end: self.patch_coord.patch_end(),
};
match PreBinnedPatchRange::covering_range(range, 2, 0) {
match PreBinnedPatchRange::covering_range(range, self.patch_coord.bin_count() + 1) {
Some(range) => {
let h = range.grid_spec.bin_t_len();
info!("FOUND NEXT GRAN g {} h {} ratio {} mod {} {:?}", g, h, g/h, g%h, range);
assert!(g / h > 1);
assert!(g / h < 20);
assert!(g % h == 0);
info!("FOUND NEXT GRAN g {} h {} ratio {} mod {} {:?}", g, h, g/h, g%h, range);
let bin_size = range.grid_spec.bin_t_len();
let channel = self.channel.clone();
let agg_kind = self.agg_kind.clone();
@@ -239,6 +239,12 @@ impl PreBinnedValueStream {
self.fut2 = Some(Box::pin(s));
}
None => {
// TODO now try to read raw data.
// TODO Request the whole pre bin patch so that I have the option to save it as cache file if complete.
// TODO The merging and other compute will be done by this node.
// TODO This node needs as input the splitted data streams.
// TODO Add a separate tcp server which can provide the parsed, unpacked, event-local-processed, reserialized streams.
error!("TODO NO BETTER GRAN FOUND FOR g {}", g);
todo!();
}

233
disk/src/channelconfig.rs Normal file
View File

@@ -0,0 +1,233 @@
#[allow(unused_imports)]
use nom::{IResult, bytes::complete::{tag, take, take_while_m_n}, combinator::map_res, sequence::tuple};
use nom::number::complete::{be_i8, be_u8, be_i16, be_i32, be_i64};
use crate::{Error, BadError};
use num_derive::{FromPrimitive, ToPrimitive};
use num_traits::{ToPrimitive};
use serde_derive::{Serialize, Deserialize};
#[derive(Debug, FromPrimitive, ToPrimitive, Serialize, Deserialize)]
pub enum DType {
Bool = 0,
Bool8 = 1,
Int8 = 2,
Uint8 = 3,
Int16 = 4,
Uint16 = 5,
Character = 6,
Int32 = 7,
Uint32 = 8,
Int64 = 9,
Uint64 = 10,
Float32 = 11,
Float64 = 12,
String = 13,
}
impl DType {
pub fn to_i16(&self) -> i16 { ToPrimitive::to_i16(self).unwrap() }
}
#[derive(Debug, FromPrimitive, ToPrimitive, Serialize, Deserialize)]
pub enum CompressionMethod {
BitshuffleLZ4 = 0,
}
impl CompressionMethod {
pub fn to_i16(&self) -> i16 { ToPrimitive::to_i16(self).unwrap() }
}
#[derive(Debug, Serialize, Deserialize)]
pub struct ConfigEntry {
pub ts: i64,
pub pulse: i64,
pub ks: i32,
pub bs: i64,
pub splitCount: i32,
pub status: i32,
pub bb: i8,
pub modulo: i32,
pub offset: i32,
/*
Precision:
0 'default' whatever that is
-7 f32
-16 f64
*/
pub precision: i16,
pub dtype: DType,
pub isCompressed: bool,
pub isShaped: bool,
pub isArray: bool,
pub isBigEndian: bool,
pub compressionMethod: Option<CompressionMethod>,
pub shape: Option<Vec<u32>>,
pub sourceName: Option<String>,
unit: Option<String>,
description: Option<String>,
optionalFields: Option<String>,
valueConverter: Option<String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct Config {
pub formatVersion: i16,
pub channelName: String,
pub entries: Vec<ConfigEntry>,
}
fn parseShortString(inp: &[u8]) -> Result<(&[u8], Option<String>), Error> {
let (inp, len1) = be_i32(inp)?;
if len1 == -1 {
return Ok((inp, None));
}
if len1 < 4 {
return BadError(format!("bad string len {}", len1));
}
if len1 > 500 {
return BadError(format!("large string len {}", len1));
}
let (inp, snb) = take((len1 - 4) as usize)(inp)?;
let s1 = String::from_utf8(snb.to_vec())?;
Ok((inp, Some(s1)))
}
/*
Parse a single configuration entry.
*/
pub fn parseEntry(inp: &[u8]) -> Result<(&[u8], Option<ConfigEntry>), Error> {
let (inp, len1) = be_i32(inp)?;
if len1 < 0 || len1 > 4000 {
return BadError(format!("ConfigEntry bad len1 {}", len1));
}
if inp.len() == 0 {
return Ok((inp, None));
}
if inp.len() < len1 as usize - 4 {
return BadError(format!("incomplete input"));
}
let inpE = &inp[(len1-8) as usize ..];
let (inp, ts) = be_i64(inp)?;
let (inp, pulse) = be_i64(inp)?;
let (inp, ks) = be_i32(inp)?;
let (inp, bs) = be_i64(inp)?;
let (inp, splitCount) = be_i32(inp)?;
let (inp, status) = be_i32(inp)?;
let (inp, bb) = be_i8(inp)?;
let (inp, modulo) = be_i32(inp)?;
let (inp, offset) = be_i32(inp)?;
let (inp, precision) = be_i16(inp)?;
let (inp, dtlen) = be_i32(inp)?;
if dtlen > 100 {
return BadError(format!("unexpected data type len {}", dtlen));
}
let (inp, dtmask) = be_u8(inp)?;
let isCompressed = dtmask & 0x80 != 0;
let isArray = dtmask & 0x40 != 0;
let isBigEndian = dtmask & 0x20 != 0;
let isShaped = dtmask & 0x10 != 0;
let (inp, dtype) = be_i8(inp)?;
if dtype > 13 {
return BadError(format!("unexpected data type {}", dtype));
}
let dtype = match num_traits::FromPrimitive::from_i8(dtype) {
Some(k) => k,
None => return BadError(format!("Can not convert {} to DType", dtype))
};
let (inp, compressionMethod) = match isCompressed {
false => (inp, None),
true => {
let (inp, cm) = be_u8(inp)?;
match num_traits::FromPrimitive::from_u8(cm) {
Some(k) => (inp, Some(k)),
None => return BadError("unknown compression"),
}
}
};
let (inp, shape) = match isShaped {
false => (inp, None),
true => {
let (mut inp, dim) = be_u8(inp)?;
if dim > 4 { return BadError(format!("unexpected number of dimensions: {}", dim)); }
let mut shape = vec![];
for _ in 0..dim {
let t1 = be_i32(inp)?;
inp = t1.0;
shape.push(t1.1 as u32);
}
(inp, Some(shape))
}
};
let (inp, sourceName) = parseShortString(inp)?;
let (inp, unit) = parseShortString(inp)?;
let (inp, description) = parseShortString(inp)?;
let (inp, optionalFields) = parseShortString(inp)?;
let (inp, valueConverter) = parseShortString(inp)?;
assert_eq!(inp.len(), inpE.len());
let (inpE, len2) = be_i32(inpE)?;
if len1 != len2 {
return BadError(format!("mismatch len1 {} len2 {}", len1, len2));
}
Ok((inpE, Some(ConfigEntry {
ts, pulse, ks, bs, splitCount, status, bb, modulo, offset, precision, dtype,
isCompressed, isArray, isShaped, isBigEndian, compressionMethod, shape,
sourceName, unit, description, optionalFields, valueConverter,
})))
}
/*
Parse the full configuration file.
*/
pub fn parseConfig(inp: &[u8]) -> Result<Config, Error> {
let (inp, ver) = be_i16(inp)?;
let (inp, len1) = be_i32(inp)?;
if len1 <= 8 || len1 > 500 {
return BadError(format!("no channel name. len1 {}", len1));
}
let (inp, chn) = take((len1 - 8) as usize)(inp)?;
let (inp, len2) = be_i32(inp)?;
if len1 != len2 {
return BadError(format!("Mismatch len1 {} len2 {}", len1, len2));
}
let mut entries = vec![];
let mut inpA = inp;
while inpA.len() > 0 {
let inp = inpA;
let (inp, e) = parseEntry(inp)?;
if let Some(e) = e { entries.push(e); }
inpA = inp;
}
Ok(Config{
formatVersion: ver,
channelName: String::from_utf8(chn.to_vec())?,
entries: entries,
})
}
#[cfg(test)]
fn read_data() -> Vec<u8> {
use std::io::Read;
let mut f1 = std::fs::File::open("ks/config/S10CB01-RLOD100-PUP10:SIG-AMPLT/latest/00000_Config").unwrap();
let mut buf = vec![];
f1.read_to_end(&mut buf).unwrap();
buf
}
#[test] fn parse_dummy() {
let config = parseConfig(&[0, 0, 0, 0, 0, 11, 0x61, 0x62, 0x63, 0, 0, 0, 11,
0, 0, 0, 1,
]).unwrap();
assert_eq!(0, config.formatVersion);
assert_eq!("abc", config.channelName);
}
#[test] fn open_file() {
let config = parseConfig(&readData()).unwrap();
assert_eq!(0, config.formatVersion);
assert_eq!(9, config.entries.len());
for e in &config.entries {
assert!(e.ts >= 631152000000000000);
assert!(e.ts <= 1591106812800073974);
assert!(e.shape.is_some());
}
}

View File

@@ -15,13 +15,16 @@ use bitshuffle::bitshuffle_compress;
use netpod::ScalarType;
use std::sync::Arc;
use netpod::{Node, Channel, ChannelConfig, Shape, timeunits::*};
use crate::ChannelConfigExt;
#[test]
fn test_gen_test_data() {
taskrun::run(async {
let res = taskrun::run(async {
gen_test_data().await?;
Ok(())
}).unwrap();
});
info!("{:?}", res);
res.unwrap();
}
pub async fn gen_test_data() -> Result<(), Error> {
@@ -40,6 +43,7 @@ pub async fn gen_test_data() -> Result<(), Error> {
},
keyspace: 3,
time_bin_size: DAY,
array: true,
scalar_type: ScalarType::F64,
shape: Shape::Wave(17),
big_endian: true,
@@ -87,12 +91,12 @@ async fn gen_channel(chn: &ChannelGenProps, node: &Node, ensemble: &Ensemble) ->
let config_path = node.data_base_path
.join("config")
.join(&chn.config.channel.name);
tokio::fs::create_dir_all(&config_path).await?;
let channel_path = node.data_base_path
.join(format!("{}_{}", node.ksprefix, chn.config.keyspace))
.join("byTime")
.join(&chn.config.channel.name);
tokio::fs::create_dir_all(&channel_path).await?;
gen_config(&config_path, &chn.config, node, ensemble).await.map_err(|k| Error::with_msg(format!("can not generate config {:?}", k)))?;
let mut evix = 0;
let mut ts = 0;
while ts < DAY {
@@ -103,6 +107,68 @@ async fn gen_channel(chn: &ChannelGenProps, node: &Node, ensemble: &Ensemble) ->
Ok(())
}
async fn gen_config(config_path: &Path, config: &ChannelConfig, node: &Node, ensemble: &Ensemble) -> Result<(), Error> {
let path = config_path.join("latest");
tokio::fs::create_dir_all(&path).await?;
let path = path.join("00000_Config");
info!("try to open {:?}", path);
let mut file = OpenOptions::new().write(true).create(true).truncate(true).open(path).await?;
let mut buf = BytesMut::with_capacity(1024 * 1);
let ver = 0;
buf.put_i16(ver);
let cnenc = config.channel.name.as_bytes();
let len1 = cnenc.len() + 8;
buf.put_i32(len1 as i32);
buf.put(cnenc);
buf.put_i32(len1 as i32);
let ts = 0;
let pulse = 0;
let sc = 0;
let status = 0;
let bb = 0;
let modulo = 0;
let offset = 0;
let precision = 0;
let p1 = buf.len();
buf.put_i32(0x20202020);
buf.put_i64(ts);
buf.put_i64(pulse);
buf.put_i32(config.keyspace as i32);
buf.put_i64(config.time_bin_size as i64);
buf.put_i32(sc);
buf.put_i32(status);
buf.put_i8(bb);
buf.put_i32(modulo);
buf.put_i32(offset);
buf.put_i16(precision);
{
// this len does not include itself and there seems to be no copy of it afterwards.
buf.put_i32(0x20202020);
let p3 = buf.len();
buf.put_u8(config.dtflags());
buf.put_u8(config.scalar_type.index());
if config.compression {
let method = 0;
buf.put_i8(method);
}
match config.shape {
Shape::Scalar => {}
Shape::Wave(k) => { buf.put_i32(k as i32); }
}
let len = buf.len() - p3;
buf.as_mut()[p3..].as_mut().put_i32(len as i32);
}
let p2 = buf.len();
let len = p2 - p1 + 4;
buf.put_i32(len as i32);
buf.as_mut()[p1..].as_mut().put_i32(len as i32);
file.write(&buf);
Ok(())
}
struct GenTimebinRes {
evix: u64,
ts: u64,

View File

@@ -14,11 +14,14 @@ use std::path::PathBuf;
use bitshuffle::bitshuffle_decompress;
use netpod::{ScalarType, Shape, Node, ChannelConfig};
use std::sync::Arc;
use crate::dtflags::{COMPRESSION, BIG_ENDIAN, ARRAY, SHAPE};
pub mod agg;
pub mod gen;
pub mod merge;
pub mod cache;
pub mod raw;
pub mod channelconfig;
pub async fn read_test_1(query: &netpod::AggQuerySingleChannel, node: Arc<Node>) -> Result<netpod::BodyStream, Error> {
@@ -949,3 +952,24 @@ pub mod dtflags {
pub const BIG_ENDIAN: u8 = 0x20;
pub const SHAPE: u8 = 0x10;
}
trait ChannelConfigExt {
fn dtflags(&self) -> u8;
}
impl ChannelConfigExt for ChannelConfig {
fn dtflags(&self) -> u8 {
let mut ret = 0;
if self.compression { ret |= COMPRESSION; }
match self.shape {
Shape::Scalar => {}
Shape::Wave(_) => { ret |= SHAPE; }
}
if self.big_endian { ret |= BIG_ENDIAN; }
if self.array { ret |= ARRAY; }
ret
}
}

46
disk/src/raw.rs Normal file
View File

@@ -0,0 +1,46 @@
/*
Provide ser/de of value data to a good net exchange format.
*/
async fn local_unpacked_test() {
// TODO what kind of query format? What information do I need here?
// Don't need exact details of channel because I need to parse the databuffer config anyway.
/*let query = netpod::AggQuerySingleChannel {
channel_config: ChannelConfig {
channel: Channel {
backend: "ks".into(),
name: "wave1".into(),
},
keyspace: 3,
time_bin_size: DAY,
shape: Shape::Wave(17),
scalar_type: ScalarType::F64,
big_endian: true,
compression: true,
},
timebin: 0,
tb_file_count: 1,
buffer_size: 1024 * 8,
};*/
let query = todo!();
let node = todo!();
// TODO generate channel configs for my test data.
// TODO open and parse the channel config.
// TODO find the matching config entry. (bonus: fuse consecutive compatible entries)
use crate::agg::{IntoDim1F32Stream};
let stream = crate::EventBlobsComplete::new(&query, query.channel_config.clone(), node)
.into_dim_1_f32_stream();
}