Refactor
This commit is contained in:
@@ -1,21 +1,18 @@
|
||||
use crate::archeng::backreadbuf::BackReadBuf;
|
||||
use crate::archeng::datablock::{read_data2, read_datafile_header2};
|
||||
use crate::archeng::indexfiles::{database_connect, unfold_stream, UnfoldExec};
|
||||
use crate::archeng::indextree::{
|
||||
read_datablockref2, DataheaderPos, Dataref, HeaderVersion, IndexFileBasics, RecordIter, RecordTarget,
|
||||
read_datablockref2, Dataref, HeaderVersion, IndexFileBasics, RecordIter, RecordTarget,
|
||||
};
|
||||
use commonio::ringbuf::RingBuf;
|
||||
use commonio::{open_read, StatsChannel};
|
||||
use err::Error;
|
||||
use futures_core::{Future, Stream};
|
||||
use items::WithLen;
|
||||
#[allow(unused)]
|
||||
use netpod::log::*;
|
||||
use netpod::{Channel, ChannelArchiver, NanoRange};
|
||||
use netpod::{Channel, Database, NanoRange};
|
||||
#[allow(unused)]
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsVal;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::collections::VecDeque;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use tokio::fs::File;
|
||||
@@ -41,37 +38,32 @@ enum Steps {
|
||||
}
|
||||
|
||||
struct BlockrefStream {
|
||||
conf: ChannelArchiver,
|
||||
dbconf: Database,
|
||||
channel: Channel,
|
||||
range: NanoRange,
|
||||
expand: bool,
|
||||
steps: Steps,
|
||||
paths: VecDeque<String>,
|
||||
file1: Option<BackReadBuf<File>>,
|
||||
file2: Option<RingBuf<File>>,
|
||||
last_dp: u64,
|
||||
last_dp2: u64,
|
||||
last_f2: String,
|
||||
last_dfhpos: DataheaderPos,
|
||||
dfnotfound: BTreeMap<String, bool>,
|
||||
data_bytes_read: u64,
|
||||
same_dfh_count: u64,
|
||||
}
|
||||
|
||||
impl BlockrefStream {
|
||||
fn new(channel: Channel, range: NanoRange, conf: ChannelArchiver) -> Self {
|
||||
fn new(channel: Channel, range: NanoRange, expand: bool, dbconf: Database) -> Self {
|
||||
debug!("new BlockrefStream {:?}", range);
|
||||
Self {
|
||||
conf,
|
||||
dbconf,
|
||||
channel,
|
||||
range,
|
||||
expand,
|
||||
steps: Steps::Start,
|
||||
paths: VecDeque::new(),
|
||||
file1: None,
|
||||
file2: None,
|
||||
last_dp: 0,
|
||||
last_dp2: 0,
|
||||
last_f2: String::new(),
|
||||
last_dfhpos: DataheaderPos(u64::MAX),
|
||||
dfnotfound: BTreeMap::new(),
|
||||
data_bytes_read: 0,
|
||||
same_dfh_count: 0,
|
||||
}
|
||||
@@ -88,7 +80,7 @@ impl BlockrefStream {
|
||||
)))
|
||||
}
|
||||
SelectIndexFile => {
|
||||
let dbc = database_connect(&self.conf.database).await?;
|
||||
let dbc = database_connect(&self.dbconf).await?;
|
||||
let sql = "select i.path from indexfiles i, channels c, channel_index_map m where c.name = $1 and m.channel = c.rowid and i.rowid = m.index";
|
||||
let rows = dbc.query(sql, &[&self.channel.name()]).await?;
|
||||
for row in rows {
|
||||
@@ -113,13 +105,16 @@ impl BlockrefStream {
|
||||
// For simplicity, simply read all storage classes linearly.
|
||||
if let Some(path) = self.paths.pop_front() {
|
||||
// TODO
|
||||
let mut file = open_read(path.clone().into(), stats).await?;
|
||||
let mut file = open_read(path.clone().into(), stats).await.map_err(|e| {
|
||||
error!("can not open {:?}", path);
|
||||
e
|
||||
})?;
|
||||
let basics = IndexFileBasics::from_file(&path, &mut file, stats).await?;
|
||||
let mut tree = basics
|
||||
.rtree_for_channel(self.channel.name(), stats)
|
||||
.await?
|
||||
.ok_or_else(|| Error::with_msg_no_trace("channel not in index files"))?;
|
||||
if let Some(iter) = tree.iter_range(self.range.clone(), stats).await? {
|
||||
if let Some(iter) = tree.iter_range(self.range.clone(), self.expand, stats).await? {
|
||||
debug!("SetupNextPath {:?}", path);
|
||||
self.steps = ReadBlocks(iter, basics.hver().duplicate(), path.clone().into());
|
||||
self.file1 = Some(BackReadBuf::new(file, 0, stats.clone()).await?);
|
||||
@@ -136,76 +131,12 @@ impl BlockrefStream {
|
||||
}
|
||||
}
|
||||
ReadBlocks(ref mut iter, ref hver, ref indexpath) => {
|
||||
// TODO stats
|
||||
let stats = &StatsChannel::dummy();
|
||||
// TODO I need to keep some datafile open.
|
||||
let item = if let Some(rec) = iter.next().await? {
|
||||
// TODO the iterator should actually return Dataref. We never expect child nodes here.
|
||||
if let RecordTarget::Dataref(dp) = rec.target {
|
||||
let f1 = self.file1.as_mut().unwrap();
|
||||
let dref = read_datablockref2(f1, dp.clone(), hver.as_ref()).await?;
|
||||
let dpath = indexpath.parent().unwrap().join(dref.file_name());
|
||||
// TODO Remember the index path, need it here for relative path.
|
||||
// TODO open datafile, relative path to index path.
|
||||
// TODO keep open when path does not change.
|
||||
let acc;
|
||||
let num_samples;
|
||||
if false {
|
||||
if let Some(_) = self.dfnotfound.get(dref.file_name()) {
|
||||
num_samples = 0;
|
||||
acc = 1;
|
||||
} else {
|
||||
if dref.file_name() == self.last_f2 {
|
||||
acc = 2;
|
||||
} else {
|
||||
match open_read(dpath.clone(), stats).await {
|
||||
Ok(f2) => {
|
||||
acc = 4;
|
||||
self.file2 = Some(
|
||||
RingBuf::new(f2, dref.data_header_pos().0, StatsChannel::dummy())
|
||||
.await?,
|
||||
);
|
||||
self.last_f2 = dref.file_name().into();
|
||||
}
|
||||
Err(_) => {
|
||||
acc = 3;
|
||||
self.file2 = None;
|
||||
}
|
||||
}
|
||||
};
|
||||
if let Some(f2) = self.file2.as_mut() {
|
||||
if dref.file_name() == self.last_f2 && dref.data_header_pos() == self.last_dfhpos {
|
||||
num_samples = 0;
|
||||
} else {
|
||||
self.last_dfhpos = dref.data_header_pos();
|
||||
let rp1 = f2.rp_abs();
|
||||
let dfheader = read_datafile_header2(f2, dref.data_header_pos()).await?;
|
||||
let data = read_data2(f2, &dfheader, self.range.clone(), false).await?;
|
||||
let rp2 = f2.rp_abs();
|
||||
self.data_bytes_read += rp2 - rp1;
|
||||
num_samples = dfheader.num_samples;
|
||||
if data.len() != num_samples as usize {
|
||||
if (data.len() as i64 - num_samples as i64).abs() < 4 {
|
||||
// TODO get always one event less than num_samples tells us.
|
||||
//warn!("small deviation {} vs {}", data.len(), num_samples);
|
||||
} else {
|
||||
return Err(Error::with_msg_no_trace(format!(
|
||||
"event count mismatch {} vs {}",
|
||||
data.len(),
|
||||
num_samples
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.dfnotfound.insert(dref.file_name().into(), true);
|
||||
num_samples = 0;
|
||||
};
|
||||
}
|
||||
} else {
|
||||
acc = 6;
|
||||
num_samples = 0;
|
||||
}
|
||||
let jsval = serde_json::to_value((
|
||||
dp.0,
|
||||
dp.0 as i64 - self.last_dp as i64,
|
||||
@@ -213,15 +144,20 @@ impl BlockrefStream {
|
||||
dref.data_header_pos.0,
|
||||
dref.data_header_pos.0 as i64 - self.last_dp2 as i64,
|
||||
dref.next().0,
|
||||
acc,
|
||||
num_samples,
|
||||
))?;
|
||||
self.last_dp = dp.0;
|
||||
self.last_dp2 = dref.data_header_pos.0;
|
||||
if rec.end.ns > self.range.end {
|
||||
debug!("Have block end beyond range, stop");
|
||||
self.steps = Done;
|
||||
}
|
||||
let bref = Blockref { dref, dpath };
|
||||
trace!("emit {:?} Record range: {:?} TO {:?}", bref, rec.beg, rec.end);
|
||||
BlockrefItem::Blockref(bref, jsval)
|
||||
} else {
|
||||
panic!();
|
||||
error!("not a Dataref target");
|
||||
self.steps = Done;
|
||||
BlockrefItem::JsVal(JsVal::String(format!("not a Dataref target")))
|
||||
}
|
||||
} else {
|
||||
debug!(
|
||||
@@ -252,7 +188,47 @@ impl UnfoldExec for BlockrefStream {
|
||||
pub fn blockref_stream(
|
||||
channel: Channel,
|
||||
range: NanoRange,
|
||||
conf: ChannelArchiver,
|
||||
expand: bool,
|
||||
dbconf: Database,
|
||||
) -> impl Stream<Item = Result<BlockrefItem, Error>> {
|
||||
unfold_stream(BlockrefStream::new(channel, range, conf.clone()))
|
||||
unfold_stream(BlockrefStream::new(channel, range, expand, dbconf))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use futures_util::StreamExt;
|
||||
use netpod::timeunits::SEC;
|
||||
|
||||
#[test]
|
||||
fn find_ref_1() -> Result<(), Error> {
|
||||
let fut = async move {
|
||||
let channel = Channel {
|
||||
backend: "sls-archive".into(),
|
||||
name: "X05DA-FE-WI1:TC1".into(),
|
||||
};
|
||||
use chrono::{DateTime, Utc};
|
||||
let dtbeg: DateTime<Utc> = "2021-10-01T00:00:00Z".parse()?;
|
||||
let dtend: DateTime<Utc> = "2021-10-10T00:00:00Z".parse()?;
|
||||
fn tons(dt: &DateTime<Utc>) -> u64 {
|
||||
dt.timestamp() as u64 * SEC + dt.timestamp_subsec_nanos() as u64
|
||||
}
|
||||
let range = NanoRange {
|
||||
beg: tons(&dtbeg),
|
||||
end: tons(&dtend),
|
||||
};
|
||||
let dbconf = Database {
|
||||
host: "localhost".into(),
|
||||
name: "testingdaq".into(),
|
||||
user: "testingdaq".into(),
|
||||
pass: "testingdaq".into(),
|
||||
};
|
||||
let mut refs = Box::pin(blockref_stream(channel, range, false, dbconf));
|
||||
while let Some(item) = refs.next().await {
|
||||
info!("Got ref {:?}", item);
|
||||
}
|
||||
Ok(())
|
||||
};
|
||||
taskrun::run(fut)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user