Do not use files where no matching files have been found

This commit is contained in:
Dominik Werder
2021-05-14 15:03:40 +02:00
parent 8dc80f5dba
commit fd80616e24
7 changed files with 166 additions and 23 deletions

View File

@@ -78,6 +78,7 @@ async fn open_files_inner(
debug!("opening path {:?}", &path);
let mut file = OpenOptions::new().read(true).open(&path).await?;
debug!("opened file {:?} {:?}", &path, &file);
let mut use_file = false;
{
let index_path = paths::index_path(ts_bin, &channel_config, &node)?;
match OpenOptions::new().read(true).open(&index_path).await {
@@ -112,23 +113,32 @@ async fn open_files_inner(
Some(o) => {
debug!("FOUND ts IN INDEX: {:?}", o);
file.seek(SeekFrom::Start(o.1)).await?;
use_file = true;
}
None => {
debug!("NOT FOUND IN INDEX");
file.seek(SeekFrom::End(0)).await?;
use_file = false;
}
}
}
Err(e) => match e.kind() {
ErrorKind::NotFound => {
file = super::index::position_file(file, range.beg).await?;
let res = super::index::position_static_len_datafile(file, range.beg).await?;
file = res.0;
if res.1 {
use_file = true;
} else {
use_file = false;
}
}
_ => Err(e)?,
},
}
}
let ret = OpenedFile { file, path };
chtx.send(Ok(ret)).await?;
if use_file {
let ret = OpenedFile { file, path };
chtx.send(Ok(ret)).await?;
}
}
// TODO keep track of number of running
debug!("open_files_inner done");

View File

@@ -6,6 +6,8 @@ use futures_core::Stream;
use futures_util::StreamExt;
use netpod::{ChannelConfig, NanoRange, Node};
use std::pin::Pin;
use std::sync::atomic::AtomicU64;
use std::sync::Arc;
use std::task::{Context, Poll};
pub struct EventBlobsComplete {
@@ -17,6 +19,7 @@ pub struct EventBlobsComplete {
range: NanoRange,
errored: bool,
completed: bool,
max_ts: Arc<AtomicU64>,
}
impl EventBlobsComplete {
@@ -36,6 +39,7 @@ impl EventBlobsComplete {
range,
errored: false,
completed: false,
max_ts: Arc::new(AtomicU64::new(0)),
}
}
}
@@ -72,6 +76,7 @@ impl Stream for EventBlobsComplete {
self.range.clone(),
self.event_chunker_conf.clone(),
path,
self.max_ts.clone(),
);
self.evs = Some(chunker);
continue 'outer;

View File

@@ -9,6 +9,8 @@ use netpod::timeunits::SEC;
use netpod::{ByteSize, ChannelConfig, EventDataReadStats, NanoRange, ScalarType, Shape};
use std::path::PathBuf;
use std::pin::Pin;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::task::{Context, Poll};
pub struct EventChunker {
@@ -26,6 +28,7 @@ pub struct EventChunker {
final_stats_sent: bool,
parsed_bytes: u64,
path: PathBuf,
max_ts: Arc<AtomicU64>,
}
enum DataFileState {
@@ -56,6 +59,7 @@ impl EventChunker {
range: NanoRange,
stats_conf: EventChunkerConf,
path: PathBuf,
max_ts: Arc<AtomicU64>,
) -> Self {
let mut inp = NeedMinBuffer::new(inp);
inp.set_need_min(6);
@@ -74,6 +78,7 @@ impl EventChunker {
final_stats_sent: false,
parsed_bytes: 0,
path,
max_ts,
}
}
@@ -83,8 +88,9 @@ impl EventChunker {
range: NanoRange,
stats_conf: EventChunkerConf,
path: PathBuf,
max_ts: Arc<AtomicU64>,
) -> Self {
let mut ret = Self::from_start(inp, channel_config, range, stats_conf, path);
let mut ret = Self::from_start(inp, channel_config, range, stats_conf, path, max_ts);
ret.state = DataFileState::Event;
ret.need_min = 4;
ret.inp.set_need_min(4);
@@ -151,6 +157,19 @@ impl EventChunker {
let _ttl = sl.read_i64::<BE>().unwrap();
let ts = sl.read_i64::<BE>().unwrap() as u64;
let pulse = sl.read_i64::<BE>().unwrap() as u64;
let max_ts = self.max_ts.load(Ordering::SeqCst);
if ts < max_ts {
Err(Error::with_msg(format!(
"unordered event ts: {}.{} max_ts {}.{} config {:?} path {:?}",
ts / SEC,
ts % SEC,
max_ts / SEC,
max_ts % SEC,
self.channel_config.shape,
self.path
)))?;
}
self.max_ts.store(ts, Ordering::SeqCst);
if ts >= self.range.end {
self.seen_beyond_range = true;
self.data_emit_complete = true;

View File

@@ -140,19 +140,11 @@ pub async fn read_event_at(pos: u64, file: &mut File) -> Result<(u32, Nanos), Er
Ok(ev)
}
pub async fn position_file(mut file: File, beg: u64) -> Result<File, Error> {
// Read first chunk which should include channel name packet, and a first event.
// It can be that file is empty.
// It can be that there is a a channel header but zero events.
pub async fn position_static_len_datafile(mut file: File, beg: u64) -> Result<(File, bool), Error> {
let flen = file.seek(SeekFrom::End(0)).await?;
file.seek(SeekFrom::Start(0)).await?;
let mut buf = vec![0; 1024];
let n1 = read(&mut buf, &mut file).await?;
if n1 < buf.len() {
// file has less content than our buffer
} else {
//
}
let _n1 = read(&mut buf, &mut file).await?;
let hres = parse_channel_header(&buf)?;
let headoff = 2 + hres.0 as u64;
let ev = parse_event(&buf[headoff as usize..])?;
@@ -160,22 +152,36 @@ pub async fn position_file(mut file: File, beg: u64) -> Result<File, Error> {
let mut j = headoff;
let mut k = ((flen - headoff) / evlen - 1) * evlen + headoff;
let x = ev.1.ns;
let y = read_event_at(k, &mut file).await?.1.ns;
let t = read_event_at(k, &mut file).await?;
if t.0 != evlen as u32 {
Err(Error::with_msg(format!(
"inconsistent event lengths: {} vs {}",
t.0, evlen
)))?;
}
let y = t.1.ns;
if x >= beg {
file.seek(SeekFrom::Start(j)).await?;
return Ok(file);
return Ok((file, true));
}
if y < beg {
file.seek(SeekFrom::Start(j)).await?;
return Ok(file);
return Ok((file, false));
}
loop {
if k - j < 2 * evlen {
file.seek(SeekFrom::Start(k)).await?;
return Ok(file);
return Ok((file, true));
}
let m = j + (k - j) / 2 / evlen * evlen;
let x = read_event_at(m, &mut file).await?.1.ns;
let t = read_event_at(m, &mut file).await?;
if t.0 != evlen as u32 {
Err(Error::with_msg(format!(
"inconsistent event lengths: {} vs {}",
t.0, evlen
)))?;
}
let x = t.1.ns;
if x < beg {
j = m;
} else {

View File

@@ -348,7 +348,8 @@ pub fn parsed1(
Ok(file) => {
let inp = Box::pin(file_content_stream(file.file, query.buffer_size as usize));
let range = err::todoval();
let mut chunker = eventchunker::EventChunker::from_event_boundary(inp, err::todoval(), range, stats_conf.clone(), file.path);
let max_ts = err::todoval();
let mut chunker = eventchunker::EventChunker::from_event_boundary(inp, err::todoval(), range, stats_conf.clone(), file.path, max_ts);
while let Some(evres) = chunker.next().await {
use eventchunker::EventChunkerItem;
match evres {