Open data via index file

This commit is contained in:
Dominik Werder
2021-04-28 11:35:06 +02:00
parent 09ea4175dc
commit 0204c37017
23 changed files with 966 additions and 709 deletions

154
disk/src/agg/binnedt.rs Normal file
View File

@@ -0,0 +1,154 @@
use crate::agg::{AggregatableTdim, AggregatorTdim};
use err::Error;
use futures_core::Stream;
use futures_util::StreamExt;
use netpod::log::*;
use netpod::BinSpecDimT;
use std::pin::Pin;
use std::task::{Context, Poll};
pub trait IntoBinnedT {
type StreamOut: Stream;
fn into_binned_t(self, spec: BinSpecDimT) -> Self::StreamOut;
}
impl<T, I> IntoBinnedT for T
where
I: AggregatableTdim + Unpin,
T: Stream<Item = Result<I, Error>> + Unpin,
I::Aggregator: Unpin,
{
type StreamOut = IntoBinnedTDefaultStream<T, I>;
fn into_binned_t(self, spec: BinSpecDimT) -> Self::StreamOut {
IntoBinnedTDefaultStream::new(self, spec)
}
}
pub struct IntoBinnedTDefaultStream<S, I>
where
I: AggregatableTdim,
S: Stream<Item = Result<I, Error>>,
{
inp: S,
aggtor: Option<I::Aggregator>,
spec: BinSpecDimT,
curbin: u32,
left: Option<Poll<Option<Result<I, Error>>>>,
errored: bool,
completed: bool,
inp_completed: bool,
}
impl<S, I> IntoBinnedTDefaultStream<S, I>
where
I: AggregatableTdim,
S: Stream<Item = Result<I, Error>>,
{
pub fn new(inp: S, spec: BinSpecDimT) -> Self {
let range = spec.get_range(0);
Self {
inp,
aggtor: Some(I::aggregator_new_static(range.beg, range.end)),
spec,
curbin: 0,
left: None,
errored: false,
completed: false,
inp_completed: false,
}
}
}
impl<T, I> Stream for IntoBinnedTDefaultStream<T, I>
where
I: AggregatableTdim + Unpin,
T: Stream<Item = Result<I, Error>> + Unpin,
I::Aggregator: Unpin,
{
type Item = Result<<I::Aggregator as AggregatorTdim>::OutputValue, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
if self.completed {
panic!("MergedFromRemotes ✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗✗ poll_next on completed");
}
if self.errored {
self.completed = true;
return Ready(None);
}
'outer: loop {
let cur = if let Some(k) = self.left.take() {
trace!("IntoBinnedTDefaultStream USE LEFTOVER");
k
} else if self.inp_completed {
Ready(None)
} else {
let inp_poll_span = span!(Level::TRACE, "into_t_inp_poll");
inp_poll_span.in_scope(|| self.inp.poll_next_unpin(cx))
};
break match cur {
Ready(Some(Ok(k))) => {
let ag = self.aggtor.as_mut().unwrap();
if ag.ends_before(&k) {
//info!("ENDS BEFORE");
continue 'outer;
} else if ag.starts_after(&k) {
//info!("STARTS AFTER");
self.left = Some(Ready(Some(Ok(k))));
self.curbin += 1;
let range = self.spec.get_range(self.curbin);
let ret = self
.aggtor
.replace(I::aggregator_new_static(range.beg, range.end))
.unwrap()
.result();
Ready(Some(Ok(ret)))
} else {
//info!("INGEST");
ag.ingest(&k);
// if this input contains also data after the current bin, then I need to keep
// it for the next round.
if ag.ends_after(&k) {
//info!("ENDS AFTER");
self.left = Some(Ready(Some(Ok(k))));
self.curbin += 1;
let range = self.spec.get_range(self.curbin);
let ret = self
.aggtor
.replace(I::aggregator_new_static(range.beg, range.end))
.unwrap()
.result();
Ready(Some(Ok(ret)))
} else {
//info!("ENDS WITHIN");
continue 'outer;
}
}
}
Ready(Some(Err(e))) => {
self.errored = true;
Ready(Some(Err(e)))
}
Ready(None) => {
self.inp_completed = true;
if self.curbin as u64 >= self.spec.count {
warn!("IntoBinnedTDefaultStream curbin out of spec, END");
self.completed = true;
Ready(None)
} else {
self.curbin += 1;
let range = self.spec.get_range(self.curbin);
match self.aggtor.replace(I::aggregator_new_static(range.beg, range.end)) {
Some(ag) => Ready(Some(Ok(ag.result()))),
None => {
panic!();
}
}
}
}
Pending => Pending,
};
}
}
}

50
disk/src/agg/binnedx.rs Normal file
View File

@@ -0,0 +1,50 @@
use crate::agg::AggregatableXdim1Bin;
use err::Error;
use futures_core::Stream;
use futures_util::StreamExt;
use std::pin::Pin;
use std::task::{Context, Poll};
pub trait IntoBinnedXBins1<I: AggregatableXdim1Bin> {
type StreamOut;
fn into_binned_x_bins_1(self) -> Self::StreamOut
where
Self: Stream<Item = Result<I, Error>>;
}
impl<T, I: AggregatableXdim1Bin> IntoBinnedXBins1<I> for T
where
T: Stream<Item = Result<I, Error>> + Unpin,
{
type StreamOut = IntoBinnedXBins1DefaultStream<T, I>;
fn into_binned_x_bins_1(self) -> Self::StreamOut {
IntoBinnedXBins1DefaultStream { inp: self }
}
}
pub struct IntoBinnedXBins1DefaultStream<S, I>
where
S: Stream<Item = Result<I, Error>> + Unpin,
I: AggregatableXdim1Bin,
{
inp: S,
}
impl<S, I> Stream for IntoBinnedXBins1DefaultStream<S, I>
where
S: Stream<Item = Result<I, Error>> + Unpin,
I: AggregatableXdim1Bin,
{
type Item = Result<I::Output, Error>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
match self.inp.poll_next_unpin(cx) {
Ready(Some(Ok(k))) => Ready(Some(Ok(k.into_agg()))),
Ready(Some(Err(e))) => Ready(Some(Err(e))),
Ready(None) => Ready(None),
Pending => Pending,
}
}
}

247
disk/src/agg/eventbatch.rs Normal file
View File

@@ -0,0 +1,247 @@
use crate::agg::scalarbinbatch::MinMaxAvgScalarBinBatch;
use crate::agg::{AggregatableTdim, AggregatableXdim1Bin, AggregatorTdim, MinMaxAvgScalarBinSingle};
use bytes::{BufMut, Bytes, BytesMut};
use netpod::log::*;
use netpod::timeunits::SEC;
use serde::{Deserialize, Serialize};
use std::mem::size_of;
#[derive(Serialize, Deserialize)]
pub struct MinMaxAvgScalarEventBatch {
pub tss: Vec<u64>,
pub mins: Vec<f32>,
pub maxs: Vec<f32>,
pub avgs: Vec<f32>,
}
impl MinMaxAvgScalarEventBatch {
pub fn empty() -> Self {
Self {
tss: vec![],
mins: vec![],
maxs: vec![],
avgs: vec![],
}
}
#[allow(dead_code)]
pub fn old_from_full_frame(buf: &Bytes) -> Self {
info!("construct MinMaxAvgScalarEventBatch from full frame len {}", buf.len());
assert!(buf.len() >= 4);
let mut g = MinMaxAvgScalarEventBatch::empty();
let n1;
unsafe {
let ptr = (&buf[0] as *const u8) as *const [u8; 4];
n1 = u32::from_le_bytes(*ptr);
trace!("--- +++ --- +++ --- +++ n1: {}", n1);
}
if n1 == 0 {
g
} else {
let n2 = n1 as usize;
g.tss.reserve(n2);
g.mins.reserve(n2);
g.maxs.reserve(n2);
g.avgs.reserve(n2);
unsafe {
// TODO Can I unsafely create ptrs and just assign them?
// TODO What are cases where I really need transmute?
g.tss.set_len(n2);
g.mins.set_len(n2);
g.maxs.set_len(n2);
g.avgs.set_len(n2);
let ptr0 = &buf[4] as *const u8;
{
let ptr1 = ptr0 as *const u64;
for i1 in 0..n2 {
g.tss[i1] = *ptr1.add(i1);
}
}
{
let ptr1 = ptr0.add((8) * n2) as *const f32;
for i1 in 0..n2 {
g.mins[i1] = *ptr1.add(i1);
}
}
{
let ptr1 = ptr0.add((8 + 4) * n2) as *const f32;
for i1 in 0..n2 {
g.maxs[i1] = *ptr1;
}
}
{
let ptr1 = ptr0.add((8 + 4 + 4) * n2) as *const f32;
for i1 in 0..n2 {
g.avgs[i1] = *ptr1;
}
}
}
info!("CONTENT {:?}", g);
g
}
}
}
impl std::fmt::Debug for MinMaxAvgScalarEventBatch {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
fmt,
"MinMaxAvgScalarEventBatch count {} tss {:?} mins {:?} maxs {:?} avgs {:?}",
self.tss.len(),
self.tss,
self.mins,
self.maxs,
self.avgs,
)
}
}
impl AggregatableXdim1Bin for MinMaxAvgScalarEventBatch {
type Output = MinMaxAvgScalarEventBatch;
fn into_agg(self) -> Self::Output {
self
}
}
impl AggregatableTdim for MinMaxAvgScalarEventBatch {
type Output = MinMaxAvgScalarBinBatch;
type Aggregator = MinMaxAvgScalarEventBatchAggregator;
fn aggregator_new_static(ts1: u64, ts2: u64) -> Self::Aggregator {
MinMaxAvgScalarEventBatchAggregator::new(ts1, ts2)
}
}
impl MinMaxAvgScalarEventBatch {
#[allow(dead_code)]
fn old_serialized(&self) -> Bytes {
let n1 = self.tss.len();
let mut g = BytesMut::with_capacity(4 + n1 * (8 + 3 * 4));
g.put_u32_le(n1 as u32);
if n1 > 0 {
let ptr = &self.tss[0] as *const u64 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<u64>() * n1) };
g.put(a);
let ptr = &self.mins[0] as *const f32 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<f32>() * n1) };
g.put(a);
let ptr = &self.maxs[0] as *const f32 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<f32>() * n1) };
g.put(a);
let ptr = &self.avgs[0] as *const f32 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<f32>() * n1) };
g.put(a);
}
info!("impl Frameable for MinMaxAvgScalarEventBatch g.len() {}", g.len());
g.freeze()
}
}
pub struct MinMaxAvgScalarEventBatchAggregator {
ts1: u64,
ts2: u64,
count: u64,
min: f32,
max: f32,
sum: f32,
}
impl MinMaxAvgScalarEventBatchAggregator {
pub fn new(ts1: u64, ts2: u64) -> Self {
Self {
ts1,
ts2,
min: f32::MAX,
max: f32::MIN,
sum: 0f32,
count: 0,
}
}
}
impl AggregatorTdim for MinMaxAvgScalarEventBatchAggregator {
type InputValue = MinMaxAvgScalarEventBatch;
type OutputValue = MinMaxAvgScalarBinSingle;
fn ends_before(&self, inp: &Self::InputValue) -> bool {
match inp.tss.last() {
Some(ts) => *ts < self.ts1,
None => true,
}
}
fn ends_after(&self, inp: &Self::InputValue) -> bool {
match inp.tss.last() {
Some(ts) => *ts >= self.ts2,
_ => panic!(),
}
}
fn starts_after(&self, inp: &Self::InputValue) -> bool {
match inp.tss.first() {
Some(ts) => *ts >= self.ts2,
_ => panic!(),
}
}
fn ingest(&mut self, v: &Self::InputValue) {
trace!(
"ingest {} {} {} {:?} {:?}",
self.ends_before(v),
self.ends_after(v),
self.starts_after(v),
v.tss.first().map(|k| k / SEC),
v.tss.last().map(|k| k / SEC),
);
for i1 in 0..v.tss.len() {
let ts = v.tss[i1];
if ts < self.ts1 {
trace!(
"EventBatchAgg {} {} {} {} IS BEFORE",
v.tss[i1],
v.mins[i1],
v.maxs[i1],
v.avgs[i1]
);
continue;
} else if ts >= self.ts2 {
trace!(
"EventBatchAgg {} {} {} {} IS AFTER",
v.tss[i1],
v.mins[i1],
v.maxs[i1],
v.avgs[i1]
);
continue;
} else {
trace!(
"EventBatchAgg {} {} {} {}",
v.tss[i1],
v.mins[i1],
v.maxs[i1],
v.avgs[i1]
);
self.min = self.min.min(v.mins[i1]);
self.max = self.max.max(v.maxs[i1]);
self.sum += v.avgs[i1];
self.count += 1;
}
}
}
fn result(self) -> Self::OutputValue {
let min = if self.min == f32::MAX { f32::NAN } else { self.min };
let max = if self.max == f32::MIN { f32::NAN } else { self.max };
let avg = if self.count == 0 {
f32::NAN
} else {
self.sum / self.count as f32
};
MinMaxAvgScalarBinSingle {
ts1: self.ts1,
ts2: self.ts2,
count: self.count,
min,
max,
avg,
}
}
}

View File

@@ -0,0 +1,226 @@
use crate::agg::{AggregatableTdim, AggregatableXdim1Bin, AggregatorTdim, Fits, FitsInside, MinMaxAvgScalarBinSingle};
use bytes::{BufMut, Bytes, BytesMut};
use netpod::log::*;
use netpod::timeunits::SEC;
use netpod::NanoRange;
use serde::{Deserialize, Serialize};
use std::mem::size_of;
#[allow(dead_code)]
#[derive(Serialize, Deserialize)]
pub struct MinMaxAvgScalarBinBatch {
ts1s: Vec<u64>,
ts2s: Vec<u64>,
counts: Vec<u64>,
mins: Vec<f32>,
maxs: Vec<f32>,
avgs: Vec<f32>,
}
impl MinMaxAvgScalarBinBatch {
pub fn empty() -> Self {
Self {
ts1s: vec![],
ts2s: vec![],
counts: vec![],
mins: vec![],
maxs: vec![],
avgs: vec![],
}
}
pub fn len(&self) -> usize {
self.ts1s.len()
}
pub fn push_single(&mut self, g: &MinMaxAvgScalarBinSingle) {
self.ts1s.push(g.ts1);
self.ts2s.push(g.ts2);
self.counts.push(g.count);
self.mins.push(g.min);
self.maxs.push(g.max);
self.avgs.push(g.avg);
}
pub fn from_full_frame(buf: &Bytes) -> Self {
info!("MinMaxAvgScalarBinBatch construct from full frame len {}", buf.len());
assert!(buf.len() >= 4);
let mut g = MinMaxAvgScalarBinBatch::empty();
let n1;
unsafe {
let ptr = (&buf[0] as *const u8) as *const [u8; 4];
n1 = u32::from_le_bytes(*ptr);
trace!(
"MinMaxAvgScalarBinBatch construct --- +++ --- +++ --- +++ n1: {}",
n1
);
}
if n1 == 0 {
g
} else {
let n2 = n1 as usize;
g.ts1s.reserve(n2);
g.ts2s.reserve(n2);
g.counts.reserve(n2);
g.mins.reserve(n2);
g.maxs.reserve(n2);
g.avgs.reserve(n2);
unsafe {
// TODO Can I unsafely create ptrs and just assign them?
// TODO What are cases where I really need transmute?
g.ts1s.set_len(n2);
g.ts2s.set_len(n2);
g.counts.set_len(n2);
g.mins.set_len(n2);
g.maxs.set_len(n2);
g.avgs.set_len(n2);
let ptr0 = &buf[4] as *const u8;
{
let ptr1 = ptr0.add(0) as *const u64;
for i1 in 0..n2 {
g.ts1s[i1] = *ptr1.add(i1);
}
}
{
let ptr1 = ptr0.add((8) * n2) as *const u64;
for i1 in 0..n2 {
g.ts2s[i1] = *ptr1.add(i1);
}
}
{
let ptr1 = ptr0.add((8 + 8) * n2) as *const u64;
for i1 in 0..n2 {
g.counts[i1] = *ptr1.add(i1);
}
}
{
let ptr1 = ptr0.add((8 + 8 + 8) * n2) as *const f32;
for i1 in 0..n2 {
g.mins[i1] = *ptr1.add(i1);
}
}
{
let ptr1 = ptr0.add((8 + 8 + 8 + 4) * n2) as *const f32;
for i1 in 0..n2 {
g.maxs[i1] = *ptr1;
}
}
{
let ptr1 = ptr0.add((8 + 8 + 8 + 4 + 4) * n2) as *const f32;
for i1 in 0..n2 {
g.avgs[i1] = *ptr1;
}
}
}
info!("CONTENT {:?}", g);
g
}
}
}
impl FitsInside for MinMaxAvgScalarBinBatch {
fn fits_inside(&self, range: NanoRange) -> Fits {
if self.ts1s.is_empty() {
Fits::Empty
} else {
let t1 = *self.ts1s.first().unwrap();
let t2 = *self.ts2s.last().unwrap();
if t2 <= range.beg {
Fits::Lower
} else if t1 >= range.end {
Fits::Greater
} else if t1 < range.beg && t2 > range.end {
Fits::PartlyLowerAndGreater
} else if t1 < range.beg {
Fits::PartlyLower
} else if t2 > range.end {
Fits::PartlyGreater
} else {
Fits::Inside
}
}
}
}
impl MinMaxAvgScalarBinBatch {
#[allow(dead_code)]
fn old_serialized(&self) -> Bytes {
let n1 = self.ts1s.len();
let mut g = BytesMut::with_capacity(4 + n1 * (3 * 8 + 3 * 4));
g.put_u32_le(n1 as u32);
if n1 > 0 {
let ptr = &self.ts1s[0] as *const u64 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<u64>() * n1) };
g.put(a);
let ptr = &self.ts2s[0] as *const u64 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<u64>() * n1) };
g.put(a);
let ptr = &self.counts[0] as *const u64 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<u64>() * n1) };
g.put(a);
let ptr = &self.mins[0] as *const f32 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<f32>() * n1) };
g.put(a);
let ptr = &self.maxs[0] as *const f32 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<f32>() * n1) };
g.put(a);
let ptr = &self.avgs[0] as *const f32 as *const u8;
let a = unsafe { std::slice::from_raw_parts(ptr, size_of::<f32>() * n1) };
g.put(a);
}
g.freeze()
}
}
impl std::fmt::Debug for MinMaxAvgScalarBinBatch {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
fmt,
"MinMaxAvgScalarBinBatch count {} ts1s {:?} ts2s {:?} counts {:?} avgs {:?}",
self.ts1s.len(),
self.ts1s.iter().map(|k| k / SEC).collect::<Vec<_>>(),
self.ts2s.iter().map(|k| k / SEC).collect::<Vec<_>>(),
self.counts,
self.avgs
)
}
}
impl AggregatableXdim1Bin for MinMaxAvgScalarBinBatch {
type Output = MinMaxAvgScalarBinBatch;
fn into_agg(self) -> Self::Output {
todo!()
}
}
impl AggregatableTdim for MinMaxAvgScalarBinBatch {
type Output = MinMaxAvgScalarBinSingle;
type Aggregator = MinMaxAvgScalarBinBatchAggregator;
fn aggregator_new_static(_ts1: u64, _ts2: u64) -> Self::Aggregator {
todo!()
}
}
pub struct MinMaxAvgScalarBinBatchAggregator {}
impl AggregatorTdim for MinMaxAvgScalarBinBatchAggregator {
type InputValue = MinMaxAvgScalarBinBatch;
type OutputValue = MinMaxAvgScalarBinSingle;
fn ends_before(&self, _inp: &Self::InputValue) -> bool {
todo!()
}
fn ends_after(&self, _inp: &Self::InputValue) -> bool {
todo!()
}
fn starts_after(&self, _inp: &Self::InputValue) -> bool {
todo!()
}
fn ingest(&mut self, _v: &Self::InputValue) {
todo!()
}
fn result(self) -> Self::OutputValue {
todo!()
}
}