WIP typechecks
This commit is contained in:
@@ -51,10 +51,13 @@ async fn position_file(
|
||||
match OpenOptions::new().read(true).open(&index_path).await {
|
||||
Ok(mut index_file) => {
|
||||
let meta = index_file.metadata().await?;
|
||||
if meta.len() > 1024 * 1024 * 120 {
|
||||
if meta.len() > 1024 * 1024 * 500 {
|
||||
let msg = format!("too large index file {} bytes for {:?}", meta.len(), index_path);
|
||||
error!("{}", msg);
|
||||
return Err(Error::with_msg(msg));
|
||||
} else if meta.len() > 1024 * 1024 * 200 {
|
||||
let msg = format!("very large index file {} bytes for {:?}", meta.len(), index_path);
|
||||
warn!("{}", msg);
|
||||
} else if meta.len() > 1024 * 1024 * 80 {
|
||||
let msg = format!("very large index file {} bytes for {:?}", meta.len(), index_path);
|
||||
warn!("{}", msg);
|
||||
@@ -184,12 +187,25 @@ async fn position_file(
|
||||
}
|
||||
|
||||
pub struct OpenedFile {
|
||||
pub pos: u64,
|
||||
pub path: PathBuf,
|
||||
pub file: Option<File>,
|
||||
pub positioned: bool,
|
||||
pub index: bool,
|
||||
pub nreads: u32,
|
||||
pub pos: u64,
|
||||
}
|
||||
|
||||
impl fmt::Debug for OpenedFile {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("OpenedFile")
|
||||
.field("pos", &self.pos)
|
||||
.field("path", &self.path)
|
||||
.field("file", &self.file.is_some())
|
||||
.field("positioned", &self.positioned)
|
||||
.field("index", &self.index)
|
||||
.field("nreads", &self.nreads)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -198,18 +214,6 @@ pub struct OpenedFileSet {
|
||||
pub files: Vec<OpenedFile>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for OpenedFile {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("OpenedFile")
|
||||
.field("path", &self.path)
|
||||
.field("file", &self.file)
|
||||
.field("positioned", &self.positioned)
|
||||
.field("index", &self.index)
|
||||
.field("nreads", &self.nreads)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn open_files(
|
||||
range: &NanoRange,
|
||||
fetch_info: &SfChFetchInfo,
|
||||
@@ -299,7 +303,7 @@ pub fn open_expanded_files(
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
// To be expected
|
||||
debug!("open_files channel send error {:?}", e);
|
||||
debug!("open_expanded_files channel send error {:?}", e);
|
||||
}
|
||||
},
|
||||
}
|
||||
@@ -345,18 +349,19 @@ async fn open_expanded_files_inner(
|
||||
) -> Result<(), Error> {
|
||||
let fetch_info = fetch_info.clone();
|
||||
let timebins = get_timebins(&fetch_info, node.clone()).await?;
|
||||
debug!("timebins {timebins:?}");
|
||||
if timebins.len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
let mut p1 = None;
|
||||
for (i1, tb) in timebins.iter().enumerate().rev() {
|
||||
for (i, tb) in timebins.iter().enumerate().rev() {
|
||||
let ts_bin = TsNano::from_ns(tb * fetch_info.bs().ns());
|
||||
if ts_bin.ns() <= range.beg {
|
||||
p1 = Some(i1);
|
||||
p1 = Some(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
let mut p1 = if let Some(i1) = p1 { i1 } else { 0 };
|
||||
let mut p1 = if let Some(i) = p1 { i } else { 0 };
|
||||
if p1 >= timebins.len() {
|
||||
return Err(Error::with_msg(format!(
|
||||
"logic error p1 {} range {:?} fetch_info {:?}",
|
||||
@@ -370,9 +375,11 @@ async fn open_expanded_files_inner(
|
||||
for path in paths::datapaths_for_timebin(tb, &fetch_info, &node).await? {
|
||||
let w = position_file(&path, range, true, false).await?;
|
||||
if w.found {
|
||||
debug!("----- open_expanded_files_inner w.found for {:?}", path);
|
||||
debug!("----- open_expanded_files_inner FOUND tb {:?} path {:?}", tb, path);
|
||||
a.push(w.file);
|
||||
found_pre = true;
|
||||
} else {
|
||||
debug!("----- open_expanded_files_inner UNFND tb {:?} path {:?}", tb, path);
|
||||
}
|
||||
}
|
||||
let h = OpenedFileSet { timebin: tb, files: a };
|
||||
|
||||
@@ -73,7 +73,6 @@ pub struct EventChunker {
|
||||
node_ix: usize,
|
||||
dbg_path: PathBuf,
|
||||
last_ts: u64,
|
||||
expand: bool,
|
||||
item_len_emit_histo: HistoLog2,
|
||||
seen_before_range_count: usize,
|
||||
seen_after_range_count: usize,
|
||||
@@ -144,7 +143,6 @@ impl EventChunker {
|
||||
stats_conf: EventChunkerConf,
|
||||
node_ix: usize,
|
||||
dbg_path: PathBuf,
|
||||
expand: bool,
|
||||
) -> Self {
|
||||
debug!("{}::{} node {}", Self::self_name(), "from_start", node_ix);
|
||||
let need_min_max = match fetch_info.shape() {
|
||||
@@ -172,7 +170,6 @@ impl EventChunker {
|
||||
dbg_path,
|
||||
node_ix,
|
||||
last_ts: 0,
|
||||
expand,
|
||||
item_len_emit_histo: HistoLog2::new(0),
|
||||
seen_before_range_count: 0,
|
||||
seen_after_range_count: 0,
|
||||
@@ -188,7 +185,6 @@ impl EventChunker {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO `expand` flag usage
|
||||
pub fn from_event_boundary(
|
||||
inp: Pin<Box<dyn Stream<Item = Result<FileChunkRead, Error>> + Send>>,
|
||||
fetch_info: SfChFetchInfo,
|
||||
@@ -196,10 +192,9 @@ impl EventChunker {
|
||||
stats_conf: EventChunkerConf,
|
||||
node_ix: usize,
|
||||
dbg_path: PathBuf,
|
||||
expand: bool,
|
||||
) -> Self {
|
||||
debug!("{}::{} node {}", Self::self_name(), "from_event_boundary", node_ix);
|
||||
let mut ret = Self::from_start(inp, fetch_info, range, stats_conf, node_ix, dbg_path, expand);
|
||||
let mut ret = Self::from_start(inp, fetch_info, range, stats_conf, node_ix, dbg_path);
|
||||
ret.state = DataFileState::Event;
|
||||
ret.need_min = 4;
|
||||
ret.inp.set_need_min(4);
|
||||
@@ -324,15 +319,15 @@ impl EventChunker {
|
||||
discard = true;
|
||||
self.discard_count_range += 1;
|
||||
self.seen_after_range_count += 1;
|
||||
if !self.expand || self.seen_after_range_count >= 2 {
|
||||
self.seen_beyond_range = true;
|
||||
self.seen_beyond_range = true;
|
||||
if self.seen_after_range_count >= 2 {
|
||||
self.data_emit_complete = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ts < self.range.beg {
|
||||
discard = true;
|
||||
self.discard_count_range += 1;
|
||||
// discard = true;
|
||||
// self.discard_count_range += 1;
|
||||
self.seen_before_range_count += 1;
|
||||
if self.seen_before_range_count < 20 {
|
||||
let msg = format!(
|
||||
@@ -349,6 +344,8 @@ impl EventChunker {
|
||||
self.dbg_path
|
||||
);
|
||||
warn!("{}", msg);
|
||||
let item = LogItem::from_node(self.node_ix, Level::INFO, msg);
|
||||
self.log_items.push_back(item);
|
||||
}
|
||||
if self.seen_before_range_count > 100 {
|
||||
let msg = format!(
|
||||
@@ -474,13 +471,17 @@ impl EventChunker {
|
||||
shape_this,
|
||||
comp_this,
|
||||
);
|
||||
match ret.shape_derived(ret.len() - 1, self.fetch_info.shape()) {
|
||||
match ret.shape_derived(
|
||||
ret.len() - 1,
|
||||
self.fetch_info.scalar_type(),
|
||||
self.fetch_info.shape(),
|
||||
) {
|
||||
Ok(sh) => {
|
||||
if sh.ne(self.fetch_info.shape()) {
|
||||
self.discard_count_shape_derived += 1;
|
||||
ret.pop_back();
|
||||
let msg = format!(
|
||||
"shape_derived mismatch {:?} {:?} {:?}",
|
||||
"EventChunker shape_derived mismatch {:?} {:?} {:?}",
|
||||
self.fetch_info.scalar_type(),
|
||||
self.fetch_info.shape(),
|
||||
sh,
|
||||
@@ -493,7 +494,7 @@ impl EventChunker {
|
||||
self.discard_count_shape_derived_err += 1;
|
||||
ret.pop_back();
|
||||
let msg = format!(
|
||||
"shape_derived error {} {:?} {:?}",
|
||||
"EventChunker shape_derived error {} {:?} {:?}",
|
||||
e,
|
||||
self.fetch_info.scalar_type(),
|
||||
self.fetch_info.shape(),
|
||||
|
||||
@@ -39,7 +39,7 @@ pub struct EventChunkerMultifile {
|
||||
range: NanoRange,
|
||||
files_count: u32,
|
||||
node_ix: usize,
|
||||
expand: bool,
|
||||
one_before: bool,
|
||||
max_ts: u64,
|
||||
out_max_len: usize,
|
||||
emit_count: usize,
|
||||
@@ -64,12 +64,12 @@ impl EventChunkerMultifile {
|
||||
node_ix: usize,
|
||||
disk_io_tune: DiskIoTune,
|
||||
event_chunker_conf: EventChunkerConf,
|
||||
expand: bool,
|
||||
one_before: bool,
|
||||
out_max_len: usize,
|
||||
reqctx: ReqCtxArc,
|
||||
) -> Self {
|
||||
debug!("EventChunkerMultifile expand {expand}");
|
||||
let file_chan = if expand {
|
||||
debug!("EventChunkerMultifile one_before {one_before}");
|
||||
let file_chan = if one_before {
|
||||
open_expanded_files(&range, &fetch_info, node)
|
||||
} else {
|
||||
open_files(&range, &fetch_info, reqctx.reqid(), node)
|
||||
@@ -83,7 +83,7 @@ impl EventChunkerMultifile {
|
||||
range,
|
||||
files_count: 0,
|
||||
node_ix,
|
||||
expand,
|
||||
one_before,
|
||||
max_ts: 0,
|
||||
out_max_len,
|
||||
emit_count: 0,
|
||||
@@ -129,6 +129,9 @@ impl Stream for EventChunkerMultifile {
|
||||
if h.len() > 0 {
|
||||
let min = h.tss.iter().fold(u64::MAX, |a, &x| a.min(x));
|
||||
let max = h.tss.iter().fold(u64::MIN, |a, &x| a.max(x));
|
||||
if min < self.range.beg() {
|
||||
debug!("ITEM BEFORE RANGE (how many?)");
|
||||
}
|
||||
if min <= self.max_ts {
|
||||
let msg = format!("EventChunkerMultifile repeated or unordered ts {}", min);
|
||||
error!("{}", msg);
|
||||
@@ -180,13 +183,19 @@ impl Stream for EventChunkerMultifile {
|
||||
None => match self.file_chan.poll_next_unpin(cx) {
|
||||
Ready(Some(k)) => match k {
|
||||
Ok(ofs) => {
|
||||
let msg = format!("received files for timebin {:?}", ofs.timebin);
|
||||
let item = LogItem::from_node(self.node_ix, Level::INFO, msg);
|
||||
self.log_queue.push_back(item);
|
||||
for e in &ofs.files {
|
||||
let msg = format!("file {:?}", e);
|
||||
let item = LogItem::from_node(self.node_ix, Level::INFO, msg);
|
||||
self.log_queue.push_back(item);
|
||||
}
|
||||
self.files_count += ofs.files.len() as u32;
|
||||
if ofs.files.len() == 1 {
|
||||
let mut ofs = ofs;
|
||||
let file = ofs.files.pop().unwrap();
|
||||
let path = file.path;
|
||||
let msg = format!("use opened files {:?}", ofs);
|
||||
let item = LogItem::from_node(self.node_ix, Level::DEBUG, msg);
|
||||
match file.file {
|
||||
Some(file) => {
|
||||
let inp = Box::pin(crate::file_content_stream(
|
||||
@@ -202,22 +211,19 @@ impl Stream for EventChunkerMultifile {
|
||||
self.event_chunker_conf.clone(),
|
||||
self.node_ix,
|
||||
path.clone(),
|
||||
self.expand,
|
||||
);
|
||||
let filtered = RangeFilter2::new(chunker, self.range.clone(), self.expand);
|
||||
let filtered =
|
||||
RangeFilter2::new(chunker, self.range.clone(), self.one_before);
|
||||
self.evs = Some(Box::pin(filtered));
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
continue;
|
||||
} else if ofs.files.len() == 0 {
|
||||
let msg = format!("use opened files {:?} no files", ofs);
|
||||
let item = LogItem::from_node(self.node_ix, Level::DEBUG, msg);
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
} else {
|
||||
// let paths: Vec<_> = ofs.files.iter().map(|x| &x.path).collect();
|
||||
let msg = format!("use opened files {:?} locally merged", ofs);
|
||||
let item = LogItem::from_node(self.node_ix, Level::DEBUG, msg);
|
||||
let mut chunkers = Vec::new();
|
||||
for of in ofs.files {
|
||||
if let Some(file) = of.file {
|
||||
@@ -234,14 +240,15 @@ impl Stream for EventChunkerMultifile {
|
||||
self.event_chunker_conf.clone(),
|
||||
self.node_ix,
|
||||
of.path.clone(),
|
||||
self.expand,
|
||||
);
|
||||
chunkers.push(Box::pin(chunker) as _);
|
||||
}
|
||||
}
|
||||
let merged = Merger::new(chunkers, Some(self.out_max_len as u32));
|
||||
let filtered = RangeFilter2::new(merged, self.range.clone(), self.expand);
|
||||
let filtered = RangeFilter2::new(merged, self.range.clone(), self.one_before);
|
||||
self.evs = Some(Box::pin(filtered));
|
||||
let msg = format!("LOCALLY MERGED");
|
||||
let item = LogItem::from_node(self.node_ix, Level::DEBUG, msg);
|
||||
Ready(Some(Ok(StreamItem::Log(item))))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use items_0::streamitem::Sitemty;
|
||||
use items_0::streamitem::StreamItem;
|
||||
use items_0::WithLen;
|
||||
use items_2::eventfull::EventFull;
|
||||
use netpod::ScalarType;
|
||||
use netpod::Shape;
|
||||
use std::collections::VecDeque;
|
||||
use std::pin::Pin;
|
||||
@@ -15,6 +16,7 @@ use tracing::Level;
|
||||
|
||||
pub struct EventFullShapeFilter<INP> {
|
||||
inp: INP,
|
||||
scalar_type_exp: ScalarType,
|
||||
shape_exp: Shape,
|
||||
node_ix: usize,
|
||||
log_items: VecDeque<LogItem>,
|
||||
@@ -25,11 +27,14 @@ impl<INP> EventFullShapeFilter<INP> {
|
||||
let node_ix = self.node_ix;
|
||||
let p: Vec<_> = (0..item.len())
|
||||
.map(|i| {
|
||||
let sh = item.shape_derived(i, &self.shape_exp);
|
||||
let sh = item.shape_derived(i, &self.scalar_type_exp, &self.shape_exp);
|
||||
match sh {
|
||||
Ok(sh) => {
|
||||
if sh.ne(&self.shape_exp) {
|
||||
let msg = format!("shape_derived mismatch {:?} {:?}", sh, self.shape_exp);
|
||||
let msg = format!(
|
||||
"EventFullShapeFilter shape_derived mismatch {:?} {:?}",
|
||||
sh, self.shape_exp
|
||||
);
|
||||
let item = LogItem::from_node(node_ix, Level::WARN, msg);
|
||||
self.log_items.push_back(item);
|
||||
false
|
||||
@@ -38,7 +43,10 @@ impl<INP> EventFullShapeFilter<INP> {
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
let msg = format!("shape_derived mismatch {:?} {:?}", sh, self.shape_exp);
|
||||
let msg = format!(
|
||||
"EventFullShapeFilter shape_derived mismatch {:?} {:?}",
|
||||
sh, self.shape_exp
|
||||
);
|
||||
let item = LogItem::from_node(self.node_ix, Level::WARN, msg);
|
||||
self.log_items.push_back(item);
|
||||
false
|
||||
|
||||
@@ -62,11 +62,8 @@ pub async fn make_event_pipe(
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<ChannelEvents>> + Send>>, Error> {
|
||||
// sf-databuffer type backends identify channels by their (backend, name) only.
|
||||
let range = evq.range().clone();
|
||||
let one_before = evq.transform().need_one_before_range();
|
||||
info!(
|
||||
"make_event_pipe need_expand {need_expand} {evq:?}",
|
||||
need_expand = one_before
|
||||
);
|
||||
let one_before = evq.need_one_before_range();
|
||||
info!("make_event_pipe one_before {one_before} {evq:?}");
|
||||
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
|
||||
// TODO should not need this for correctness.
|
||||
// Should limit based on return size and latency.
|
||||
@@ -94,7 +91,7 @@ pub async fn make_event_pipe(
|
||||
pub fn make_event_blobs_stream(
|
||||
range: NanoRange,
|
||||
fetch_info: SfChFetchInfo,
|
||||
expand: bool,
|
||||
one_before: bool,
|
||||
event_chunker_conf: EventChunkerConf,
|
||||
disk_io_tune: DiskIoTune,
|
||||
reqctx: ReqCtxArc,
|
||||
@@ -115,7 +112,7 @@ pub fn make_event_blobs_stream(
|
||||
ncc.ix,
|
||||
disk_io_tune,
|
||||
event_chunker_conf,
|
||||
expand,
|
||||
one_before,
|
||||
out_max_len,
|
||||
reqctx,
|
||||
);
|
||||
@@ -128,13 +125,13 @@ pub fn make_event_blobs_pipe_real(
|
||||
reqctx: ReqCtxArc,
|
||||
ncc: &NodeConfigCached,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Sitemty<EventFull>> + Send>>, Error> {
|
||||
let expand = subq.transform().need_one_before_range();
|
||||
let one_before = subq.need_one_before_range();
|
||||
let range = subq.range();
|
||||
let event_chunker_conf = EventChunkerConf::new(ByteSize::from_kb(1024));
|
||||
let event_blobs = make_event_blobs_stream(
|
||||
range.try_into()?,
|
||||
fetch_info.clone(),
|
||||
expand,
|
||||
one_before,
|
||||
event_chunker_conf,
|
||||
subq.disk_io_tune(),
|
||||
reqctx,
|
||||
|
||||
Reference in New Issue
Block a user