Try to defend better against unexpected shapes where possible

This commit is contained in:
Dominik Werder
2023-08-30 16:39:00 +02:00
parent ab9a4d69ec
commit 05a31fbad1
21 changed files with 452 additions and 191 deletions

View File

@@ -155,15 +155,23 @@ impl AsAnyMut for ChannelEvents {
}
mod serde_channel_events {
use super::{ChannelEvents, Events};
use super::ChannelEvents;
use super::Events;
use crate::channelevents::ConnStatusEvent;
use crate::eventsdim0::EventsDim0;
use crate::eventsdim1::EventsDim1;
use crate::eventsxbindim0::EventsXbinDim0;
use items_0::subfr::SubFrId;
use serde::de::{self, EnumAccess, VariantAccess, Visitor};
use netpod::log::*;
use serde::de;
use serde::de::EnumAccess;
use serde::de::VariantAccess;
use serde::de::Visitor;
use serde::ser::SerializeSeq;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde::Deserialize;
use serde::Deserializer;
use serde::Serialize;
use serde::Serializer;
use std::fmt;
struct EvRef<'a>(&'a dyn Events);
@@ -202,10 +210,10 @@ mod serde_channel_events {
where
A: de::SeqAccess<'de>,
{
let e0: &str = seq.next_element()?.ok_or_else(|| de::Error::missing_field("[0] cty"))?;
let e1: u32 = seq.next_element()?.ok_or_else(|| de::Error::missing_field("[1] nty"))?;
if e0 == EventsDim0::<u8>::serde_id() {
match e1 {
let cty: &str = seq.next_element()?.ok_or_else(|| de::Error::missing_field("[0] cty"))?;
let nty: u32 = seq.next_element()?.ok_or_else(|| de::Error::missing_field("[1] nty"))?;
if cty == EventsDim0::<u8>::serde_id() {
match nty {
u8::SUB => {
let obj: EventsDim0<u8> =
seq.next_element()?.ok_or_else(|| de::Error::missing_field("[2] obj"))?;
@@ -261,10 +269,18 @@ mod serde_channel_events {
seq.next_element()?.ok_or_else(|| de::Error::missing_field("[2] obj"))?;
Ok(EvBox(Box::new(obj)))
}
_ => Err(de::Error::custom(&format!("unknown nty {e1}"))),
_ => {
error!("TODO serde cty {cty} nty {nty}");
Err(de::Error::custom(&format!("unknown nty {nty}")))
}
}
} else if e0 == EventsDim1::<u8>::serde_id() {
match e1 {
} else if cty == EventsDim1::<u8>::serde_id() {
match nty {
i64::SUB => {
let obj: EventsDim1<i64> =
seq.next_element()?.ok_or_else(|| de::Error::missing_field("[2] obj"))?;
Ok(EvBox(Box::new(obj)))
}
f32::SUB => {
let obj: EventsDim1<f32> =
seq.next_element()?.ok_or_else(|| de::Error::missing_field("[2] obj"))?;
@@ -280,10 +296,13 @@ mod serde_channel_events {
seq.next_element()?.ok_or_else(|| de::Error::missing_field("[2] obj"))?;
Ok(EvBox(Box::new(obj)))
}
_ => Err(de::Error::custom(&format!("unknown nty {e1}"))),
_ => {
error!("TODO serde cty {cty} nty {nty}");
Err(de::Error::custom(&format!("unknown nty {nty}")))
}
}
} else if e0 == EventsXbinDim0::<u8>::serde_id() {
match e1 {
} else if cty == EventsXbinDim0::<u8>::serde_id() {
match nty {
f32::SUB => {
let obj: EventsXbinDim0<f32> =
seq.next_element()?.ok_or_else(|| de::Error::missing_field("[2] obj"))?;
@@ -299,10 +318,14 @@ mod serde_channel_events {
seq.next_element()?.ok_or_else(|| de::Error::missing_field("[2] obj"))?;
Ok(EvBox(Box::new(obj)))
}
_ => Err(de::Error::custom(&format!("unknown nty {e1}"))),
_ => {
error!("TODO serde cty {cty} nty {nty}");
Err(de::Error::custom(&format!("unknown nty {nty}")))
}
}
} else {
Err(de::Error::custom(&format!("unknown cty {e0}")))
error!("TODO serde cty {cty} nty {nty}");
Err(de::Error::custom(&format!("unknown cty {cty}")))
}
}
}

View File

@@ -126,6 +126,16 @@ impl EventFull {
*u = shape.clone();
}
}
pub fn pop_back(&mut self) {
self.tss.pop_back();
self.pulses.pop_back();
self.blobs.pop_back();
self.scalar_types.pop_back();
self.be.pop_back();
self.shapes.pop_back();
self.comps.pop_back();
}
}
impl FrameTypeInnerStatic for EventFull {
@@ -230,9 +240,11 @@ pub enum DecompError {
BadCompresionBlockSize,
UnusedBytes,
BitshuffleError,
ShapeMakesNoSense,
}
fn decompress(databuf: &[u8], type_size: u32, ele_count_2: u64, ele_count_exp: u64) -> Result<Vec<u8>, DecompError> {
fn decompress(databuf: &[u8], type_size: u32) -> Result<Vec<u8>, DecompError> {
// TODO collect decompression stats
let ts1 = Instant::now();
if databuf.len() < 12 {
return Err(DecompError::TooLittleInput);
@@ -276,22 +288,93 @@ fn decompress(databuf: &[u8], type_size: u32, ele_count_2: u64, ele_count_exp: u
}
impl EventFull {
/// Tries to infer the actual shape of the event from what's on disk and what we expect.
/// The event data on disk usually always indicate "scalar" even for waveforms.
/// If the data is compressed via bslz4 then we can infer the number of elements
/// but we still don't know whether that's an image or a waveform.
/// Therefore, the function accepts the expected shape to at least make an assumption
/// about whether this is an image or a waveform.
pub fn shape_derived(&self, i: usize, shape_exp: &Shape) -> Result<Shape, DecompError> {
match shape_exp {
Shape::Scalar => match &self.comps[i] {
Some(_) => Err(DecompError::ShapeMakesNoSense),
None => Ok(Shape::Scalar),
},
Shape::Wave(_) => match &self.shapes[i] {
Shape::Scalar => match &self.comps[i] {
Some(comp) => match comp {
CompressionMethod::BitshuffleLZ4 => {
let type_size = self.scalar_types[i].bytes() as u32;
match self.blobs[i][0..8].try_into() {
Ok(a) => {
let value_bytes = u64::from_be_bytes(a);
let value_bytes = value_bytes as u32;
if value_bytes % type_size != 0 {
Err(DecompError::ShapeMakesNoSense)
} else {
let n = value_bytes / type_size;
// Here we still can't know whether the disk contains a waveform or image
// so we assume that the user input is correct:
Ok(Shape::Wave(n))
}
}
Err(_) => Err(DecompError::ShapeMakesNoSense),
}
}
},
None => Err(DecompError::ShapeMakesNoSense),
},
Shape::Wave(s) => Ok(Shape::Wave(s.clone())),
Shape::Image(_, _) => Err(DecompError::ShapeMakesNoSense),
},
Shape::Image(a, b) => match &self.shapes[i] {
Shape::Scalar => match &self.comps[i] {
Some(comp) => match comp {
CompressionMethod::BitshuffleLZ4 => {
let type_size = self.scalar_types[i].bytes() as u32;
match self.blobs[i][0..8].try_into() {
Ok(vb) => {
let value_bytes = u64::from_be_bytes(vb);
let value_bytes = value_bytes as u32;
if value_bytes % type_size != 0 {
Err(DecompError::ShapeMakesNoSense)
} else {
let n = value_bytes / type_size;
// Here we still can't know whether the disk contains a waveform or image
// so we assume that the user input is correct.
// NOTE
// We only know the number of pixels from the compressed blob but we can't
// know the actual shape.
// Can only rely on user input.
Ok(Shape::Image(*a, *b))
}
}
Err(_) => Err(DecompError::ShapeMakesNoSense),
}
}
},
None => Err(DecompError::ShapeMakesNoSense),
},
Shape::Wave(_) => Err(DecompError::ShapeMakesNoSense),
Shape::Image(a, b) => Ok(Shape::Image(*a, *b)),
},
}
}
pub fn data_raw(&self, i: usize) -> &[u8] {
&self.blobs[i]
}
pub fn data_decompressed(
&self,
i: usize,
_scalar_type: &ScalarType,
shape: &Shape,
) -> Result<Cow<[u8]>, DecompError> {
pub fn data_decompressed(&self, i: usize) -> Result<Cow<[u8]>, DecompError> {
if let Some(comp) = &self.comps[i] {
match comp {
CompressionMethod::BitshuffleLZ4 => {
// NOTE the event data on databuffer disk seems to contain the correct scalar type
// but the shape of the event record seems always "scalar" even for waveforms
// so we must derive the shape of the compressed data from the length of the
// uncompressed byte blob and the byte size of the scalar type.
let type_size = self.scalar_types[i].bytes() as u32;
let ele_count = self.shapes[i].ele_count();
let data = decompress(&self.blobs[i], type_size, ele_count, shape.ele_count())?;
let data = decompress(&self.blobs[i], type_size)?;
Ok(Cow::Owned(data))
}
}

View File

@@ -228,15 +228,19 @@ where
#[derive(Debug)]
pub struct EventsDim0Collector<STY> {
vals: Option<EventsDim0<STY>>,
vals: EventsDim0<STY>,
range_final: bool,
timed_out: bool,
}
impl<STY> EventsDim0Collector<STY> {
pub fn self_name() -> &'static str {
any::type_name::<Self>()
}
pub fn new() -> Self {
Self {
vals: None,
vals: EventsDim0::empty(),
range_final: false,
timed_out: false,
}
@@ -245,7 +249,7 @@ impl<STY> EventsDim0Collector<STY> {
impl<STY> WithLen for EventsDim0Collector<STY> {
fn len(&self) -> usize {
self.vals.as_ref().map_or(0, |x| x.tss.len())
self.vals.tss.len()
}
}
@@ -367,13 +371,9 @@ impl<STY: ScalarOps> CollectorType for EventsDim0Collector<STY> {
type Output = EventsDim0CollectorOutput<STY>;
fn ingest(&mut self, src: &mut Self::Input) {
if self.vals.is_none() {
self.vals = Some(EventsDim0::empty());
}
let vals = self.vals.as_mut().unwrap();
vals.tss.append(&mut src.tss);
vals.pulses.append(&mut src.pulses);
vals.values.append(&mut src.values);
self.vals.tss.append(&mut src.tss);
self.vals.pulses.append(&mut src.pulses);
self.vals.values.append(&mut src.values);
}
fn set_range_complete(&mut self) {
@@ -389,17 +389,12 @@ impl<STY: ScalarOps> CollectorType for EventsDim0Collector<STY> {
range: Option<SeriesRange>,
_binrange: Option<BinnedRangeEnum>,
) -> Result<Self::Output, Error> {
let self_name = any::type_name::<Self>();
// If we timed out, we want to hint the client from where to continue.
// This is tricky: currently, client can not request a left-exclusive range.
// We currently give the timestamp of the last event plus a small delta.
// The amount of the delta must take into account what kind of timestamp precision the client
// can parse and handle.
let vals = if let Some(x) = &mut self.vals {
x
} else {
return Err(Error::with_msg_no_trace(format!("{self_name} no vals")));
};
let vals = &mut self.vals;
let continue_at = if self.timed_out {
if let Some(ts) = vals.tss.back() {
Some(IsoDateTime::from_u64(*ts + MS))

View File

@@ -29,6 +29,7 @@ use items_0::WithLen;
use netpod::is_false;
use netpod::log::*;
use netpod::range::evrange::SeriesRange;
use netpod::timeunits::MS;
use netpod::timeunits::SEC;
use netpod::BinnedRangeEnum;
use serde::Deserialize;
@@ -184,13 +185,17 @@ where
}
#[derive(Debug)]
pub struct EventsDim1Collector<NTY> {
vals: EventsDim1<NTY>,
pub struct EventsDim1Collector<STY> {
vals: EventsDim1<STY>,
range_final: bool,
timed_out: bool,
}
impl<NTY> EventsDim1Collector<NTY> {
impl<STY> EventsDim1Collector<STY> {
pub fn self_name() -> &'static str {
any::type_name::<Self>()
}
pub fn new() -> Self {
Self {
vals: EventsDim1::empty(),
@@ -200,14 +205,14 @@ impl<NTY> EventsDim1Collector<NTY> {
}
}
impl<NTY> WithLen for EventsDim1Collector<NTY> {
impl<STY> WithLen for EventsDim1Collector<STY> {
fn len(&self) -> usize {
self.vals.tss.len()
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct EventsDim1CollectorOutput<NTY> {
pub struct EventsDim1CollectorOutput<STY> {
#[serde(rename = "tsAnchor")]
ts_anchor_sec: u64,
#[serde(rename = "tsMs")]
@@ -219,7 +224,7 @@ pub struct EventsDim1CollectorOutput<NTY> {
#[serde(rename = "pulseOff")]
pulse_off: VecDeque<u64>,
#[serde(rename = "values")]
values: VecDeque<Vec<NTY>>,
values: VecDeque<Vec<STY>>,
#[serde(rename = "rangeFinal", default, skip_serializing_if = "is_false")]
range_final: bool,
#[serde(rename = "timedOut", default, skip_serializing_if = "is_false")]
@@ -228,7 +233,7 @@ pub struct EventsDim1CollectorOutput<NTY> {
continue_at: Option<IsoDateTime>,
}
impl<NTY: ScalarOps> EventsDim1CollectorOutput<NTY> {
impl<STY: ScalarOps> EventsDim1CollectorOutput<STY> {
pub fn ts_anchor_sec(&self) -> u64 {
self.ts_anchor_sec
}
@@ -253,13 +258,40 @@ impl<NTY: ScalarOps> EventsDim1CollectorOutput<NTY> {
.collect()
}
pub fn range_complete(&self) -> bool {
pub fn range_final(&self) -> bool {
self.range_final
}
pub fn timed_out(&self) -> bool {
self.timed_out
}
pub fn is_valid(&self) -> bool {
if self.ts_off_ms.len() != self.ts_off_ns.len() {
false
} else if self.ts_off_ms.len() != self.pulse_off.len() {
false
} else if self.ts_off_ms.len() != self.values.len() {
false
} else {
true
}
}
pub fn info_str(&self) -> String {
use fmt::Write;
let mut out = String::new();
write!(
out,
"ts_off_ms {} ts_off_ns {} pulse_off {} values {}",
self.ts_off_ms.len(),
self.ts_off_ns.len(),
self.pulse_off.len(),
self.values.len(),
)
.unwrap();
out
}
}
impl<NTY> AsAnyRef for EventsDim1CollectorOutput<NTY>
@@ -295,9 +327,9 @@ impl<NTY: ScalarOps> ToJsonResult for EventsDim1CollectorOutput<NTY> {
impl<NTY: ScalarOps> Collected for EventsDim1CollectorOutput<NTY> {}
impl<NTY: ScalarOps> CollectorType for EventsDim1Collector<NTY> {
type Input = EventsDim1<NTY>;
type Output = EventsDim1CollectorOutput<NTY>;
impl<STY: ScalarOps> CollectorType for EventsDim1Collector<STY> {
type Input = EventsDim1<STY>;
type Output = EventsDim1CollectorOutput<STY>;
fn ingest(&mut self, src: &mut Self::Input) {
self.vals.tss.append(&mut src.tss);
@@ -313,6 +345,7 @@ impl<NTY: ScalarOps> CollectorType for EventsDim1Collector<NTY> {
self.timed_out = true;
}
// TODO unify with dim0 case
fn result(
&mut self,
range: Option<SeriesRange>,
@@ -323,12 +356,19 @@ impl<NTY: ScalarOps> CollectorType for EventsDim1Collector<NTY> {
// We currently give the timestamp of the last event plus a small delta.
// The amount of the delta must take into account what kind of timestamp precision the client
// can parse and handle.
/*let continue_at = if self.timed_out {
if let Some(ts) = self.vals.tss.back() {
Some(IsoDateTime::from_u64(*ts + netpod::timeunits::MS))
let vals = &mut self.vals;
let continue_at = if self.timed_out {
if let Some(ts) = vals.tss.back() {
Some(IsoDateTime::from_u64(*ts + MS))
} else {
if let Some(range) = &range {
Some(IsoDateTime::from_u64(range.beg + netpod::timeunits::SEC))
match range {
SeriesRange::TimeRange(x) => Some(IsoDateTime::from_u64(x.beg + SEC)),
SeriesRange::PulseRange(x) => {
error!("TODO emit create continueAt for pulse range");
None
}
}
} else {
warn!("can not determine continue-at parameters");
None
@@ -337,28 +377,40 @@ impl<NTY: ScalarOps> CollectorType for EventsDim1Collector<NTY> {
} else {
None
};
let tss_sl = self.vals.tss.make_contiguous();
let pulses_sl = self.vals.pulses.make_contiguous();
let tss_sl = vals.tss.make_contiguous();
let pulses_sl = vals.pulses.make_contiguous();
let (ts_anchor_sec, ts_off_ms, ts_off_ns) = crate::ts_offs_from_abs(tss_sl);
let (pulse_anchor, pulse_off) = crate::pulse_offs_from_abs(pulses_sl);
let values = mem::replace(&mut vals.values, VecDeque::new());
if ts_off_ms.len() != ts_off_ns.len() {
return Err(Error::with_msg_no_trace("collected len mismatch"));
}
if ts_off_ms.len() != pulse_off.len() {
return Err(Error::with_msg_no_trace("collected len mismatch"));
}
if ts_off_ms.len() != values.len() {
return Err(Error::with_msg_no_trace("collected len mismatch"));
}
let ret = Self::Output {
ts_anchor_sec,
ts_off_ms,
ts_off_ns,
pulse_anchor,
pulse_off: pulse_off,
values: mem::replace(&mut self.vals.values, VecDeque::new()),
pulse_off,
values,
range_final: self.range_final,
timed_out: self.timed_out,
continue_at,
};
Ok(ret)*/
todo!()
if !ret.is_valid() {
error!("invalid:\n{}", ret.info_str());
}
Ok(ret)
}
}
impl<NTY: ScalarOps> CollectableType for EventsDim1<NTY> {
type Collector = EventsDim1Collector<NTY>;
impl<STY: ScalarOps> CollectableType for EventsDim1<STY> {
type Collector = EventsDim1Collector<STY>;
fn new_collector() -> Self::Collector {
Self::Collector::new()

View File

@@ -64,7 +64,7 @@ where
ser
}
pub fn bincode_to_vec<S>(item: S) -> Result<Vec<u8>, Error>
fn bincode_to_vec<S>(item: S) -> Result<Vec<u8>, Error>
where
S: Serialize,
{
@@ -74,7 +74,7 @@ where
Ok(out)
}
pub fn bincode_from_slice<T>(buf: &[u8]) -> Result<T, Error>
fn bincode_from_slice<T>(buf: &[u8]) -> Result<T, Error>
where
T: for<'de> serde::Deserialize<'de>,
{
@@ -87,14 +87,14 @@ where
<T as serde::Deserialize>::deserialize(&mut de).map_err(|e| format!("{e}").into())
}
pub fn msgpack_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
fn msgpack_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
where
T: Serialize,
{
rmp_serde::to_vec_named(&item).map_err(|e| format!("{e}").into())
}
pub fn msgpack_erased_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
fn msgpack_erased_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
where
T: erased_serde::Serialize,
{
@@ -106,21 +106,21 @@ where
Ok(out)
}
pub fn msgpack_from_slice<T>(buf: &[u8]) -> Result<T, Error>
fn msgpack_from_slice<T>(buf: &[u8]) -> Result<T, Error>
where
T: for<'de> serde::Deserialize<'de>,
{
rmp_serde::from_slice(buf).map_err(|e| format!("{e}").into())
}
pub fn postcard_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
fn postcard_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
where
T: Serialize,
{
postcard::to_stdvec(&item).map_err(|e| format!("{e}").into())
}
pub fn postcard_erased_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
fn postcard_erased_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
where
T: erased_serde::Serialize,
{
@@ -146,24 +146,37 @@ pub fn encode_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
where
T: Serialize,
{
// msgpack_to_vec(item)
postcard_to_vec(item)
if false {
msgpack_to_vec(item)
} else if false {
bincode_to_vec(item)
} else {
postcard_to_vec(item)
}
}
pub fn encode_erased_to_vec<T>(item: T) -> Result<Vec<u8>, Error>
where
T: erased_serde::Serialize,
{
// msgpack_erased_to_vec(item)
postcard_erased_to_vec(item)
if false {
msgpack_erased_to_vec(item)
} else {
postcard_erased_to_vec(item)
}
}
pub fn decode_from_slice<T>(buf: &[u8]) -> Result<T, Error>
where
T: for<'de> serde::Deserialize<'de>,
{
// msgpack_from_slice(buf)
postcard_from_slice(buf)
if false {
msgpack_from_slice(buf)
} else if false {
bincode_from_slice(buf)
} else {
postcard_from_slice(buf)
}
}
pub fn make_frame_2<T>(item: T, fty: u32) -> Result<BytesMut, Error>
@@ -321,11 +334,7 @@ where
let k: err::Error = match decode_from_slice(frame.buf()) {
Ok(item) => item,
Err(e) => {
error!(
"ERROR deserialize len {} ERROR_FRAME_TYPE_ID {}",
frame.buf().len(),
e
);
error!("deserialize len {} ERROR_FRAME_TYPE_ID {}", frame.buf().len(), e);
let n = frame.buf().len().min(256);
let s = String::from_utf8_lossy(&frame.buf()[..n]);
error!("frame.buf as string: {:?}", s);
@@ -337,7 +346,7 @@ where
let k: LogItem = match decode_from_slice(frame.buf()) {
Ok(item) => item,
Err(e) => {
error!("ERROR deserialize len {} LOG_FRAME_TYPE_ID {}", frame.buf().len(), e);
error!("deserialize len {} LOG_FRAME_TYPE_ID {}", frame.buf().len(), e);
let n = frame.buf().len().min(128);
let s = String::from_utf8_lossy(&frame.buf()[..n]);
error!("frame.buf as string: {:?}", s);
@@ -349,11 +358,7 @@ where
let k: StatsItem = match decode_from_slice(frame.buf()) {
Ok(item) => item,
Err(e) => {
error!(
"ERROR deserialize len {} STATS_FRAME_TYPE_ID {}",
frame.buf().len(),
e
);
error!("deserialize len {} STATS_FRAME_TYPE_ID {}", frame.buf().len(), e);
let n = frame.buf().len().min(128);
let s = String::from_utf8_lossy(&frame.buf()[..n]);
error!("frame.buf as string: {:?}", s);
@@ -368,7 +373,7 @@ where
let tyid = T::FRAME_TYPE_ID;
if frame.tyid() != tyid {
Err(Error::with_msg(format!(
"type id mismatch expect {:x} found {:x} {:?}",
"type id mismatch expect {:04x} found {:04x} {:?}",
tyid,
frame.tyid(),
frame
@@ -377,11 +382,16 @@ where
match decode_from_slice(frame.buf()) {
Ok(item) => Ok(item),
Err(e) => {
error!("decode_frame T = {}", any::type_name::<T>());
error!("ERROR deserialize len {} tyid {:x}", frame.buf().len(), frame.tyid());
error!(
"decode_from_slice error len {} tyid {:04x} T {}",
frame.buf().len(),
frame.tyid(),
any::type_name::<T>()
);
let n = frame.buf().len().min(64);
let s = String::from_utf8_lossy(&frame.buf()[..n]);
error!("frame.buf as string: {:?}", s);
error!("decode_from_slice bad frame.buf as bytes: {:?}", &frame.buf()[..n]);
error!("decode_from_slice bad frame.buf as string: {:?}", s);
Err(e)?
}
}