Fix when unexpected split shows up in local storage
This commit is contained in:
@@ -18,6 +18,12 @@ pub struct OpenedFile {
|
||||
pub nreads: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct OpenedFileSet {
|
||||
pub timebin: u64,
|
||||
pub files: Vec<OpenedFile>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for OpenedFile {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("OpenedFile")
|
||||
@@ -34,7 +40,7 @@ pub fn open_files(
|
||||
range: &NanoRange,
|
||||
channel_config: &ChannelConfig,
|
||||
node: Node,
|
||||
) -> async_channel::Receiver<Result<OpenedFile, Error>> {
|
||||
) -> async_channel::Receiver<Result<OpenedFileSet, Error>> {
|
||||
let (chtx, chrx) = async_channel::bounded(2);
|
||||
let range = range.clone();
|
||||
let channel_config = channel_config.clone();
|
||||
@@ -53,30 +59,16 @@ pub fn open_files(
|
||||
}
|
||||
|
||||
async fn open_files_inner(
|
||||
chtx: &async_channel::Sender<Result<OpenedFile, Error>>,
|
||||
chtx: &async_channel::Sender<Result<OpenedFileSet, Error>>,
|
||||
range: &NanoRange,
|
||||
channel_config: &ChannelConfig,
|
||||
node: Node,
|
||||
) -> Result<(), Error> {
|
||||
let channel_config = channel_config.clone();
|
||||
let mut timebins = vec![];
|
||||
{
|
||||
let rd = tokio::fs::read_dir(paths::channel_timebins_dir_path(&channel_config, &node)?).await?;
|
||||
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
|
||||
while let Some(e) = rd.next().await {
|
||||
let e = e?;
|
||||
let dn = e
|
||||
.file_name()
|
||||
.into_string()
|
||||
.map_err(|e| Error::with_msg(format!("Bad OS path {:?}", e)))?;
|
||||
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
|
||||
if vv == 19 {
|
||||
timebins.push(dn.parse::<u64>()?);
|
||||
}
|
||||
}
|
||||
let timebins = get_timebins(&channel_config, node.clone()).await?;
|
||||
if timebins.len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
timebins.sort_unstable();
|
||||
let timebins = timebins;
|
||||
for &tb in &timebins {
|
||||
let ts_bin = Nanos {
|
||||
ns: tb * channel_config.time_bin_size.ns,
|
||||
@@ -87,10 +79,37 @@ async fn open_files_inner(
|
||||
if ts_bin.ns + channel_config.time_bin_size.ns <= range.beg {
|
||||
continue;
|
||||
}
|
||||
let path = paths::datapath(tb, &channel_config, &node);
|
||||
let mut file = OpenOptions::new().read(true).open(&path).await?;
|
||||
let ret = {
|
||||
let index_path = paths::index_path(ts_bin, &channel_config, &node)?;
|
||||
let mut a = vec![];
|
||||
for path in paths::datapaths_for_timebin(tb, &channel_config, &node).await? {
|
||||
let w = position_file(&path, range, &channel_config, false).await?;
|
||||
if w.found {
|
||||
a.push(w.file);
|
||||
}
|
||||
}
|
||||
let h = OpenedFileSet { timebin: tb, files: a };
|
||||
info!(
|
||||
"----- open_files_inner giving OpenedFileSet with {} files",
|
||||
h.files.len()
|
||||
);
|
||||
chtx.send(Ok(h)).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct Positioned {
|
||||
file: OpenedFile,
|
||||
found: bool,
|
||||
}
|
||||
|
||||
async fn position_file(
|
||||
path: &PathBuf,
|
||||
range: &NanoRange,
|
||||
channel_config: &ChannelConfig,
|
||||
expand: bool,
|
||||
) -> Result<Positioned, Error> {
|
||||
match OpenOptions::new().read(true).open(&path).await {
|
||||
Ok(file) => {
|
||||
let index_path = PathBuf::from(format!("{}_Index", path.to_str().unwrap()));
|
||||
match OpenOptions::new().read(true).open(&index_path).await {
|
||||
Ok(mut index_file) => {
|
||||
let meta = index_file.metadata().await?;
|
||||
@@ -118,63 +137,91 @@ async fn open_files_inner(
|
||||
let mut buf = BytesMut::with_capacity(meta.len() as usize);
|
||||
buf.resize(buf.capacity(), 0);
|
||||
index_file.read_exact(&mut buf).await?;
|
||||
match super::index::find_ge(range.beg, &buf[2..])? {
|
||||
let gg = if expand {
|
||||
super::index::find_largest_smaller_than(range.beg, &buf[2..])?
|
||||
} else {
|
||||
super::index::find_ge(range.beg, &buf[2..])?
|
||||
};
|
||||
match gg {
|
||||
Some(o) => {
|
||||
let mut file = file;
|
||||
file.seek(SeekFrom::Start(o.1)).await?;
|
||||
OpenedFile {
|
||||
info!("position_file case A {:?}", path);
|
||||
let g = OpenedFile {
|
||||
file: Some(file),
|
||||
path,
|
||||
path: path.clone(),
|
||||
positioned: true,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
}
|
||||
};
|
||||
return Ok(Positioned { file: g, found: true });
|
||||
}
|
||||
None => {
|
||||
info!("position_file case B {:?}", path);
|
||||
let g = OpenedFile {
|
||||
file: None,
|
||||
path: path.clone(),
|
||||
positioned: false,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
};
|
||||
return Ok(Positioned { file: g, found: false });
|
||||
}
|
||||
None => OpenedFile {
|
||||
file: None,
|
||||
path,
|
||||
positioned: false,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::NotFound => {
|
||||
let ts1 = Instant::now();
|
||||
let res = super::index::position_static_len_datafile(file, range.beg).await?;
|
||||
let res = if expand {
|
||||
super::index::position_static_len_datafile_at_largest_smaller_than(file, range.beg).await?
|
||||
} else {
|
||||
super::index::position_static_len_datafile(file, range.beg).await?
|
||||
};
|
||||
let ts2 = Instant::now();
|
||||
if false {
|
||||
// TODO collect for stats:
|
||||
let dur = ts2.duration_since(ts1);
|
||||
info!("position_static_len_datafile took ms {}", dur.as_millis());
|
||||
}
|
||||
file = res.0;
|
||||
let file = res.0;
|
||||
if res.1 {
|
||||
OpenedFile {
|
||||
info!("position_file case C {:?}", path);
|
||||
let g = OpenedFile {
|
||||
file: Some(file),
|
||||
path,
|
||||
path: path.clone(),
|
||||
positioned: true,
|
||||
index: false,
|
||||
nreads: res.2,
|
||||
}
|
||||
};
|
||||
return Ok(Positioned { file: g, found: true });
|
||||
} else {
|
||||
OpenedFile {
|
||||
info!("position_file case D {:?}", path);
|
||||
let g = OpenedFile {
|
||||
file: None,
|
||||
path,
|
||||
path: path.clone(),
|
||||
positioned: false,
|
||||
index: false,
|
||||
nreads: res.2,
|
||||
}
|
||||
};
|
||||
return Ok(Positioned { file: g, found: false });
|
||||
}
|
||||
}
|
||||
_ => Err(e)?,
|
||||
},
|
||||
}
|
||||
};
|
||||
chtx.send(Ok(ret)).await?;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("can not open {:?} error {:?}", path, e);
|
||||
let g = OpenedFile {
|
||||
file: None,
|
||||
path: path.clone(),
|
||||
positioned: false,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
};
|
||||
return Ok(Positioned { file: g, found: false });
|
||||
}
|
||||
}
|
||||
// TODO keep track of number of running
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -185,7 +232,7 @@ pub fn open_expanded_files(
|
||||
range: &NanoRange,
|
||||
channel_config: &ChannelConfig,
|
||||
node: Node,
|
||||
) -> async_channel::Receiver<Result<OpenedFile, Error>> {
|
||||
) -> async_channel::Receiver<Result<OpenedFileSet, Error>> {
|
||||
let (chtx, chrx) = async_channel::bounded(2);
|
||||
let range = range.clone();
|
||||
let channel_config = channel_config.clone();
|
||||
@@ -203,31 +250,33 @@ pub fn open_expanded_files(
|
||||
chrx
|
||||
}
|
||||
|
||||
async fn get_timebins(channel_config: &ChannelConfig, node: Node) -> Result<Vec<u64>, Error> {
|
||||
let mut timebins = vec![];
|
||||
let rd = tokio::fs::read_dir(paths::channel_timebins_dir_path(&channel_config, &node)?).await?;
|
||||
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
|
||||
while let Some(e) = rd.next().await {
|
||||
let e = e?;
|
||||
let dn = e
|
||||
.file_name()
|
||||
.into_string()
|
||||
.map_err(|e| Error::with_msg(format!("Bad OS path {:?}", e)))?;
|
||||
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
|
||||
if vv == 19 {
|
||||
timebins.push(dn.parse::<u64>()?);
|
||||
}
|
||||
}
|
||||
timebins.sort_unstable();
|
||||
Ok(timebins)
|
||||
}
|
||||
|
||||
async fn open_expanded_files_inner(
|
||||
chtx: &async_channel::Sender<Result<OpenedFile, Error>>,
|
||||
chtx: &async_channel::Sender<Result<OpenedFileSet, Error>>,
|
||||
range: &NanoRange,
|
||||
channel_config: &ChannelConfig,
|
||||
node: Node,
|
||||
) -> Result<(), Error> {
|
||||
let channel_config = channel_config.clone();
|
||||
let mut timebins = vec![];
|
||||
{
|
||||
let rd = tokio::fs::read_dir(paths::channel_timebins_dir_path(&channel_config, &node)?).await?;
|
||||
let mut rd = tokio_stream::wrappers::ReadDirStream::new(rd);
|
||||
while let Some(e) = rd.next().await {
|
||||
let e = e?;
|
||||
let dn = e
|
||||
.file_name()
|
||||
.into_string()
|
||||
.map_err(|e| Error::with_msg(format!("Bad OS path {:?}", e)))?;
|
||||
let vv = dn.chars().fold(0, |a, x| if x.is_digit(10) { a + 1 } else { a });
|
||||
if vv == 19 {
|
||||
timebins.push(dn.parse::<u64>()?);
|
||||
}
|
||||
}
|
||||
}
|
||||
timebins.sort_unstable();
|
||||
let timebins = timebins;
|
||||
let timebins = get_timebins(&channel_config, node.clone()).await?;
|
||||
if timebins.len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
@@ -245,206 +294,50 @@ async fn open_expanded_files_inner(
|
||||
let mut found_first = false;
|
||||
loop {
|
||||
let tb = timebins[p1];
|
||||
let ts_bin = Nanos {
|
||||
ns: tb * channel_config.time_bin_size.ns,
|
||||
};
|
||||
let path = paths::datapath(tb, &channel_config, &node);
|
||||
let mut file = OpenOptions::new().read(true).open(&path).await?;
|
||||
let ret = {
|
||||
let index_path = paths::index_path(ts_bin, &channel_config, &node)?;
|
||||
match OpenOptions::new().read(true).open(&index_path).await {
|
||||
Ok(mut index_file) => {
|
||||
let meta = index_file.metadata().await?;
|
||||
if meta.len() > 1024 * 1024 * 20 {
|
||||
return Err(Error::with_msg(format!(
|
||||
"too large index file {} bytes for {}",
|
||||
meta.len(),
|
||||
channel_config.channel.name
|
||||
)));
|
||||
}
|
||||
if meta.len() < 2 {
|
||||
return Err(Error::with_msg(format!(
|
||||
"bad meta len {} for {}",
|
||||
meta.len(),
|
||||
channel_config.channel.name
|
||||
)));
|
||||
}
|
||||
if meta.len() % 16 != 2 {
|
||||
return Err(Error::with_msg(format!(
|
||||
"bad meta len {} for {}",
|
||||
meta.len(),
|
||||
channel_config.channel.name
|
||||
)));
|
||||
}
|
||||
let mut buf = BytesMut::with_capacity(meta.len() as usize);
|
||||
buf.resize(buf.capacity(), 0);
|
||||
index_file.read_exact(&mut buf).await?;
|
||||
match super::index::find_largest_smaller_than(range.beg, &buf[2..])? {
|
||||
Some(o) => {
|
||||
found_first = true;
|
||||
file.seek(SeekFrom::Start(o.1)).await?;
|
||||
OpenedFile {
|
||||
file: Some(file),
|
||||
path,
|
||||
positioned: true,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
}
|
||||
}
|
||||
None => OpenedFile {
|
||||
file: None,
|
||||
path,
|
||||
positioned: false,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::NotFound => {
|
||||
let ts1 = Instant::now();
|
||||
let res =
|
||||
super::index::position_static_len_datafile_at_largest_smaller_than(file, range.beg).await?;
|
||||
let ts2 = Instant::now();
|
||||
if false {
|
||||
// TODO collect for stats:
|
||||
let dur = ts2.duration_since(ts1);
|
||||
info!("position_static_len_datafile took ms {}", dur.as_millis());
|
||||
}
|
||||
file = res.0;
|
||||
if res.1 {
|
||||
found_first = true;
|
||||
OpenedFile {
|
||||
file: Some(file),
|
||||
path,
|
||||
positioned: true,
|
||||
index: false,
|
||||
nreads: res.2,
|
||||
}
|
||||
} else {
|
||||
OpenedFile {
|
||||
file: None,
|
||||
path,
|
||||
positioned: false,
|
||||
index: false,
|
||||
nreads: res.2,
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => Err(e)?,
|
||||
},
|
||||
let mut a = vec![];
|
||||
for path in paths::datapaths_for_timebin(tb, &channel_config, &node).await? {
|
||||
let w = position_file(&path, range, &channel_config, true).await?;
|
||||
if w.found {
|
||||
info!("----- open_expanded_files_inner w.found for {:?}", path);
|
||||
a.push(w.file);
|
||||
found_first = true;
|
||||
}
|
||||
};
|
||||
}
|
||||
let h = OpenedFileSet { timebin: tb, files: a };
|
||||
info!(
|
||||
"----- open_expanded_files_inner giving OpenedFileSet with {} files",
|
||||
h.files.len()
|
||||
);
|
||||
chtx.send(Ok(h)).await?;
|
||||
if found_first {
|
||||
p1 += 1;
|
||||
chtx.send(Ok(ret)).await?;
|
||||
break;
|
||||
} else if p1 == 0 {
|
||||
break;
|
||||
} else {
|
||||
if p1 == 0 {
|
||||
break;
|
||||
} else {
|
||||
p1 -= 1;
|
||||
}
|
||||
p1 -= 1;
|
||||
}
|
||||
}
|
||||
if found_first {
|
||||
// Append all following positioned files.
|
||||
loop {
|
||||
let tb = timebins[p1];
|
||||
let ts_bin = Nanos {
|
||||
ns: tb * channel_config.time_bin_size.ns,
|
||||
};
|
||||
let path = paths::datapath(tb, &channel_config, &node);
|
||||
let mut file = OpenOptions::new().read(true).open(&path).await?;
|
||||
let ret = {
|
||||
let index_path = paths::index_path(ts_bin, &channel_config, &node)?;
|
||||
match OpenOptions::new().read(true).open(&index_path).await {
|
||||
Ok(mut index_file) => {
|
||||
let meta = index_file.metadata().await?;
|
||||
if meta.len() > 1024 * 1024 * 20 {
|
||||
return Err(Error::with_msg(format!(
|
||||
"too large index file {} bytes for {}",
|
||||
meta.len(),
|
||||
channel_config.channel.name
|
||||
)));
|
||||
}
|
||||
if meta.len() < 2 {
|
||||
return Err(Error::with_msg(format!(
|
||||
"bad meta len {} for {}",
|
||||
meta.len(),
|
||||
channel_config.channel.name
|
||||
)));
|
||||
}
|
||||
if meta.len() % 16 != 2 {
|
||||
return Err(Error::with_msg(format!(
|
||||
"bad meta len {} for {}",
|
||||
meta.len(),
|
||||
channel_config.channel.name
|
||||
)));
|
||||
}
|
||||
let mut buf = BytesMut::with_capacity(meta.len() as usize);
|
||||
buf.resize(buf.capacity(), 0);
|
||||
index_file.read_exact(&mut buf).await?;
|
||||
match super::index::find_ge(range.beg, &buf[2..])? {
|
||||
Some(o) => {
|
||||
file.seek(SeekFrom::Start(o.1)).await?;
|
||||
OpenedFile {
|
||||
file: Some(file),
|
||||
path,
|
||||
positioned: true,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
}
|
||||
}
|
||||
None => OpenedFile {
|
||||
file: None,
|
||||
path,
|
||||
positioned: false,
|
||||
index: true,
|
||||
nreads: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::NotFound => {
|
||||
let ts1 = Instant::now();
|
||||
let res = super::index::position_static_len_datafile(file, range.beg).await?;
|
||||
let ts2 = Instant::now();
|
||||
if false {
|
||||
// TODO collect for stats:
|
||||
let dur = ts2.duration_since(ts1);
|
||||
info!("position_static_len_datafile took ms {}", dur.as_millis());
|
||||
}
|
||||
file = res.0;
|
||||
if res.1 {
|
||||
OpenedFile {
|
||||
file: Some(file),
|
||||
path,
|
||||
positioned: true,
|
||||
index: false,
|
||||
nreads: res.2,
|
||||
}
|
||||
} else {
|
||||
OpenedFile {
|
||||
file: None,
|
||||
path,
|
||||
positioned: false,
|
||||
index: false,
|
||||
nreads: res.2,
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => Err(e)?,
|
||||
},
|
||||
let mut a = vec![];
|
||||
for path in paths::datapaths_for_timebin(tb, &channel_config, &node).await? {
|
||||
let w = position_file(&path, range, &channel_config, false).await?;
|
||||
if w.found {
|
||||
a.push(w.file);
|
||||
}
|
||||
};
|
||||
chtx.send(Ok(ret)).await?;
|
||||
}
|
||||
let h = OpenedFileSet { timebin: tb, files: a };
|
||||
chtx.send(Ok(h)).await?;
|
||||
p1 += 1;
|
||||
if p1 >= timebins.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info!("Could not find some event before the requested range, fall back to standard file list.");
|
||||
// Try to locate files according to non-expand-algorithm.
|
||||
open_files_inner(chtx, range, &channel_config, node).await?;
|
||||
}
|
||||
@@ -481,7 +374,7 @@ fn expanded_file_list() {
|
||||
match file {
|
||||
Ok(k) => {
|
||||
info!("opened file: {:?}", k);
|
||||
paths.push(k.path.clone());
|
||||
paths.push(k.files);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("error while trying to open {:?}", e);
|
||||
|
||||
Reference in New Issue
Block a user