Fix when unexpected split shows up in local storage

This commit is contained in:
Dominik Werder
2021-09-09 16:28:56 +02:00
parent c455c01d24
commit a4e2326650
8 changed files with 589 additions and 279 deletions

View File

@@ -18,6 +18,12 @@ pub struct OpenedFile {
pub nreads: u32,
}
#[derive(Debug)]
pub struct OpenedFileSet {
pub timebin: u64,
pub files: Vec<OpenedFile>,
}
impl fmt::Debug for OpenedFile {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("OpenedFile")
@@ -34,7 +40,7 @@ pub fn open_files(
range: &NanoRange,
channel_config: &ChannelConfig,
node: Node,
) -> async_channel::Receiver<Result<OpenedFile, Error>> {
) -> async_channel::Receiver<Result<OpenedFileSet, Error>> {
let (chtx, chrx) = async_channel::bounded(2);
let range = range.clone();
let channel_config = channel_config.clone();
@@ -53,30 +59,16 @@ pub fn open_files(
}
async fn open_files_inner(
chtx: &async_channel::Sender<Result<OpenedFile, Error>>,
chtx: &async_channel::Sender<Result<OpenedFileSet, Error>>,
range: &NanoRange,
channel_config: &ChannelConfig,
node: Node,
) -> Result<(), Error> {
let channel_config = channel_config.clone();
let mut timebins = vec![];
{
let rd = tokio::fs::read_dir(paths::channel_timebins_dir_path(&channel_config, &node)?).await?;
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
while let Some(e) = rd.next().await {
let e = e?;
let dn = e
.file_name()
.into_string()
.map_err(|e| Error::with_msg(format!("Bad OS path {:?}", e)))?;
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
if vv == 19 {
timebins.push(dn.parse::<u64>()?);
}
}
let timebins = get_timebins(&channel_config, node.clone()).await?;
if timebins.len() == 0 {
return Ok(());
}
timebins.sort_unstable();
let timebins = timebins;
for &tb in &timebins {
let ts_bin = Nanos {
ns: tb * channel_config.time_bin_size.ns,
@@ -87,10 +79,37 @@ async fn open_files_inner(
if ts_bin.ns + channel_config.time_bin_size.ns <= range.beg {
continue;
}
let path = paths::datapath(tb, &channel_config, &node);
let mut file = OpenOptions::new().read(true).open(&path).await?;
let ret = {
let index_path = paths::index_path(ts_bin, &channel_config, &node)?;
let mut a = vec![];
for path in paths::datapaths_for_timebin(tb, &channel_config, &node).await? {
let w = position_file(&path, range, &channel_config, false).await?;
if w.found {
a.push(w.file);
}
}
let h = OpenedFileSet { timebin: tb, files: a };
info!(
"----- open_files_inner giving OpenedFileSet with {} files",
h.files.len()
);
chtx.send(Ok(h)).await?;
}
Ok(())
}
struct Positioned {
file: OpenedFile,
found: bool,
}
async fn position_file(
path: &PathBuf,
range: &NanoRange,
channel_config: &ChannelConfig,
expand: bool,
) -> Result<Positioned, Error> {
match OpenOptions::new().read(true).open(&path).await {
Ok(file) => {
let index_path = PathBuf::from(format!("{}_Index", path.to_str().unwrap()));
match OpenOptions::new().read(true).open(&index_path).await {
Ok(mut index_file) => {
let meta = index_file.metadata().await?;
@@ -118,63 +137,91 @@ async fn open_files_inner(
let mut buf = BytesMut::with_capacity(meta.len() as usize);
buf.resize(buf.capacity(), 0);
index_file.read_exact(&mut buf).await?;
match super::index::find_ge(range.beg, &buf[2..])? {
let gg = if expand {
super::index::find_largest_smaller_than(range.beg, &buf[2..])?
} else {
super::index::find_ge(range.beg, &buf[2..])?
};
match gg {
Some(o) => {
let mut file = file;
file.seek(SeekFrom::Start(o.1)).await?;
OpenedFile {
info!("position_file case A {:?}", path);
let g = OpenedFile {
file: Some(file),
path,
path: path.clone(),
positioned: true,
index: true,
nreads: 0,
}
};
return Ok(Positioned { file: g, found: true });
}
None => {
info!("position_file case B {:?}", path);
let g = OpenedFile {
file: None,
path: path.clone(),
positioned: false,
index: true,
nreads: 0,
};
return Ok(Positioned { file: g, found: false });
}
None => OpenedFile {
file: None,
path,
positioned: false,
index: true,
nreads: 0,
},
}
}
Err(e) => match e.kind() {
ErrorKind::NotFound => {
let ts1 = Instant::now();
let res = super::index::position_static_len_datafile(file, range.beg).await?;
let res = if expand {
super::index::position_static_len_datafile_at_largest_smaller_than(file, range.beg).await?
} else {
super::index::position_static_len_datafile(file, range.beg).await?
};
let ts2 = Instant::now();
if false {
// TODO collect for stats:
let dur = ts2.duration_since(ts1);
info!("position_static_len_datafile took ms {}", dur.as_millis());
}
file = res.0;
let file = res.0;
if res.1 {
OpenedFile {
info!("position_file case C {:?}", path);
let g = OpenedFile {
file: Some(file),
path,
path: path.clone(),
positioned: true,
index: false,
nreads: res.2,
}
};
return Ok(Positioned { file: g, found: true });
} else {
OpenedFile {
info!("position_file case D {:?}", path);
let g = OpenedFile {
file: None,
path,
path: path.clone(),
positioned: false,
index: false,
nreads: res.2,
}
};
return Ok(Positioned { file: g, found: false });
}
}
_ => Err(e)?,
},
}
};
chtx.send(Ok(ret)).await?;
}
Err(e) => {
warn!("can not open {:?} error {:?}", path, e);
let g = OpenedFile {
file: None,
path: path.clone(),
positioned: false,
index: true,
nreads: 0,
};
return Ok(Positioned { file: g, found: false });
}
}
// TODO keep track of number of running
Ok(())
}
/*
@@ -185,7 +232,7 @@ pub fn open_expanded_files(
range: &NanoRange,
channel_config: &ChannelConfig,
node: Node,
) -> async_channel::Receiver<Result<OpenedFile, Error>> {
) -> async_channel::Receiver<Result<OpenedFileSet, Error>> {
let (chtx, chrx) = async_channel::bounded(2);
let range = range.clone();
let channel_config = channel_config.clone();
@@ -203,31 +250,33 @@ pub fn open_expanded_files(
chrx
}
async fn get_timebins(channel_config: &ChannelConfig, node: Node) -> Result<Vec<u64>, Error> {
let mut timebins = vec![];
let rd = tokio::fs::read_dir(paths::channel_timebins_dir_path(&channel_config, &node)?).await?;
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
while let Some(e) = rd.next().await {
let e = e?;
let dn = e
.file_name()
.into_string()
.map_err(|e| Error::with_msg(format!("Bad OS path {:?}", e)))?;
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
if vv == 19 {
timebins.push(dn.parse::<u64>()?);
}
}
timebins.sort_unstable();
Ok(timebins)
}
async fn open_expanded_files_inner(
chtx: &async_channel::Sender<Result<OpenedFile, Error>>,
chtx: &async_channel::Sender<Result<OpenedFileSet, Error>>,
range: &NanoRange,
channel_config: &ChannelConfig,
node: Node,
) -> Result<(), Error> {
let channel_config = channel_config.clone();
let mut timebins = vec![];
{
let rd = tokio::fs::read_dir(paths::channel_timebins_dir_path(&channel_config, &node)?).await?;
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
while let Some(e) = rd.next().await {
let e = e?;
let dn = e
.file_name()
.into_string()
.map_err(|e| Error::with_msg(format!("Bad OS path {:?}", e)))?;
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
if vv == 19 {
timebins.push(dn.parse::<u64>()?);
}
}
}
timebins.sort_unstable();
let timebins = timebins;
let timebins = get_timebins(&channel_config, node.clone()).await?;
if timebins.len() == 0 {
return Ok(());
}
@@ -245,206 +294,50 @@ async fn open_expanded_files_inner(
let mut found_first = false;
loop {
let tb = timebins[p1];
let ts_bin = Nanos {
ns: tb * channel_config.time_bin_size.ns,
};
let path = paths::datapath(tb, &channel_config, &node);
let mut file = OpenOptions::new().read(true).open(&path).await?;
let ret = {
let index_path = paths::index_path(ts_bin, &channel_config, &node)?;
match OpenOptions::new().read(true).open(&index_path).await {
Ok(mut index_file) => {
let meta = index_file.metadata().await?;
if meta.len() > 1024 * 1024 * 20 {
return Err(Error::with_msg(format!(
"too large index file {} bytes for {}",
meta.len(),
channel_config.channel.name
)));
}
if meta.len() < 2 {
return Err(Error::with_msg(format!(
"bad meta len {} for {}",
meta.len(),
channel_config.channel.name
)));
}
if meta.len() % 16 != 2 {
return Err(Error::with_msg(format!(
"bad meta len {} for {}",
meta.len(),
channel_config.channel.name
)));
}
let mut buf = BytesMut::with_capacity(meta.len() as usize);
buf.resize(buf.capacity(), 0);
index_file.read_exact(&mut buf).await?;
match super::index::find_largest_smaller_than(range.beg, &buf[2..])? {
Some(o) => {
found_first = true;
file.seek(SeekFrom::Start(o.1)).await?;
OpenedFile {
file: Some(file),
path,
positioned: true,
index: true,
nreads: 0,
}
}
None => OpenedFile {
file: None,
path,
positioned: false,
index: true,
nreads: 0,
},
}
}
Err(e) => match e.kind() {
ErrorKind::NotFound => {
let ts1 = Instant::now();
let res =
super::index::position_static_len_datafile_at_largest_smaller_than(file, range.beg).await?;
let ts2 = Instant::now();
if false {
// TODO collect for stats:
let dur = ts2.duration_since(ts1);
info!("position_static_len_datafile took ms {}", dur.as_millis());
}
file = res.0;
if res.1 {
found_first = true;
OpenedFile {
file: Some(file),
path,
positioned: true,
index: false,
nreads: res.2,
}
} else {
OpenedFile {
file: None,
path,
positioned: false,
index: false,
nreads: res.2,
}
}
}
_ => Err(e)?,
},
let mut a = vec![];
for path in paths::datapaths_for_timebin(tb, &channel_config, &node).await? {
let w = position_file(&path, range, &channel_config, true).await?;
if w.found {
info!("----- open_expanded_files_inner w.found for {:?}", path);
a.push(w.file);
found_first = true;
}
};
}
let h = OpenedFileSet { timebin: tb, files: a };
info!(
"----- open_expanded_files_inner giving OpenedFileSet with {} files",
h.files.len()
);
chtx.send(Ok(h)).await?;
if found_first {
p1 += 1;
chtx.send(Ok(ret)).await?;
break;
} else if p1 == 0 {
break;
} else {
if p1 == 0 {
break;
} else {
p1 -= 1;
}
p1 -= 1;
}
}
if found_first {
// Append all following positioned files.
loop {
let tb = timebins[p1];
let ts_bin = Nanos {
ns: tb * channel_config.time_bin_size.ns,
};
let path = paths::datapath(tb, &channel_config, &node);
let mut file = OpenOptions::new().read(true).open(&path).await?;
let ret = {
let index_path = paths::index_path(ts_bin, &channel_config, &node)?;
match OpenOptions::new().read(true).open(&index_path).await {
Ok(mut index_file) => {
let meta = index_file.metadata().await?;
if meta.len() > 1024 * 1024 * 20 {
return Err(Error::with_msg(format!(
"too large index file {} bytes for {}",
meta.len(),
channel_config.channel.name
)));
}
if meta.len() < 2 {
return Err(Error::with_msg(format!(
"bad meta len {} for {}",
meta.len(),
channel_config.channel.name
)));
}
if meta.len() % 16 != 2 {
return Err(Error::with_msg(format!(
"bad meta len {} for {}",
meta.len(),
channel_config.channel.name
)));
}
let mut buf = BytesMut::with_capacity(meta.len() as usize);
buf.resize(buf.capacity(), 0);
index_file.read_exact(&mut buf).await?;
match super::index::find_ge(range.beg, &buf[2..])? {
Some(o) => {
file.seek(SeekFrom::Start(o.1)).await?;
OpenedFile {
file: Some(file),
path,
positioned: true,
index: true,
nreads: 0,
}
}
None => OpenedFile {
file: None,
path,
positioned: false,
index: true,
nreads: 0,
},
}
}
Err(e) => match e.kind() {
ErrorKind::NotFound => {
let ts1 = Instant::now();
let res = super::index::position_static_len_datafile(file, range.beg).await?;
let ts2 = Instant::now();
if false {
// TODO collect for stats:
let dur = ts2.duration_since(ts1);
info!("position_static_len_datafile took ms {}", dur.as_millis());
}
file = res.0;
if res.1 {
OpenedFile {
file: Some(file),
path,
positioned: true,
index: false,
nreads: res.2,
}
} else {
OpenedFile {
file: None,
path,
positioned: false,
index: false,
nreads: res.2,
}
}
}
_ => Err(e)?,
},
let mut a = vec![];
for path in paths::datapaths_for_timebin(tb, &channel_config, &node).await? {
let w = position_file(&path, range, &channel_config, false).await?;
if w.found {
a.push(w.file);
}
};
chtx.send(Ok(ret)).await?;
}
let h = OpenedFileSet { timebin: tb, files: a };
chtx.send(Ok(h)).await?;
p1 += 1;
if p1 >= timebins.len() {
break;
}
}
} else {
info!("Could not find some event before the requested range, fall back to standard file list.");
// Try to locate files according to non-expand-algorithm.
open_files_inner(chtx, range, &channel_config, node).await?;
}
@@ -481,7 +374,7 @@ fn expanded_file_list() {
match file {
Ok(k) => {
info!("opened file: {:?}", k);
paths.push(k.path.clone());
paths.push(k.files);
}
Err(e) => {
error!("error while trying to open {:?}", e);