Write cache
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
use crate::agg::eventbatch::MinMaxAvgScalarEventBatchStreamItem;
|
||||
use crate::agg::scalarbinbatch::MinMaxAvgScalarBinBatch;
|
||||
use crate::binnedstream::BinnedStream;
|
||||
use crate::cache::pbv::PreBinnedValueByteStream;
|
||||
use crate::channelconfig::{extract_matching_config_entry, read_local_config};
|
||||
@@ -15,12 +16,15 @@ use netpod::{
|
||||
AggKind, BinnedRange, Channel, Cluster, NanoRange, NodeConfigCached, PreBinnedPatchCoord, PreBinnedPatchIterator,
|
||||
PreBinnedPatchRange, ToNanos,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::BTreeMap;
|
||||
use std::future::Future;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use tiny_keccak::Hasher;
|
||||
use tokio::io::{AsyncRead, ReadBuf};
|
||||
use tokio::fs::OpenOptions;
|
||||
use tokio::io::{AsyncRead, AsyncWriteExt, ReadBuf};
|
||||
#[allow(unused_imports)]
|
||||
use tracing::{debug, error, info, trace, warn};
|
||||
|
||||
@@ -244,8 +248,16 @@ pub fn pre_binned_bytes_for_http(
|
||||
query: &PreBinnedQuery,
|
||||
) -> Result<PreBinnedValueByteStream, Error> {
|
||||
info!("pre_binned_bytes_for_http {:?} {:?}", query, node_config.node);
|
||||
let ret = super::cache::pbv::pre_binned_value_byte_stream_new(query, node_config);
|
||||
Ok(ret)
|
||||
let patch_node_ix = node_ix_for_patch(&query.patch, &query.channel, &node_config.node_config.cluster);
|
||||
if node_config.ix as u32 != patch_node_ix {
|
||||
Err(Error::with_msg(format!(
|
||||
"pre_binned_bytes_for_http node mismatch node_config.ix {} patch_node_ix {}",
|
||||
node_config.ix, patch_node_ix
|
||||
)))
|
||||
} else {
|
||||
let ret = super::cache::pbv::pre_binned_value_byte_stream_new(query, node_config);
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HttpBodyAsAsyncRead {
|
||||
@@ -432,3 +444,80 @@ pub fn node_ix_for_patch(patch_coord: &PreBinnedPatchCoord, channel: &Channel, c
|
||||
let ix = u32::from_le_bytes(a) % cluster.nodes.len() as u32;
|
||||
ix
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct CacheFileDesc {
|
||||
// What identifies a cached file?
|
||||
channel: Channel,
|
||||
agg_kind: AggKind,
|
||||
patch: PreBinnedPatchCoord,
|
||||
}
|
||||
|
||||
impl CacheFileDesc {
|
||||
pub fn hash(&self) -> String {
|
||||
let mut h = tiny_keccak::Sha3::v256();
|
||||
h.update(b"V000");
|
||||
h.update(self.channel.backend.as_bytes());
|
||||
h.update(self.channel.name.as_bytes());
|
||||
h.update(format!("{:?}", self.agg_kind).as_bytes());
|
||||
h.update(&self.patch.spec().bin_t_len().to_le_bytes());
|
||||
h.update(&self.patch.spec().patch_t_len().to_le_bytes());
|
||||
h.update(&self.patch.ix().to_le_bytes());
|
||||
let mut buf = [0; 32];
|
||||
h.finalize(&mut buf);
|
||||
hex::encode(&buf)
|
||||
}
|
||||
|
||||
pub fn hash_channel(&self) -> String {
|
||||
let mut h = tiny_keccak::Sha3::v256();
|
||||
h.update(b"V000");
|
||||
h.update(self.channel.backend.as_bytes());
|
||||
h.update(self.channel.name.as_bytes());
|
||||
let mut buf = [0; 32];
|
||||
h.finalize(&mut buf);
|
||||
hex::encode(&buf)
|
||||
}
|
||||
|
||||
pub fn path(&self, node_config: &NodeConfigCached) -> PathBuf {
|
||||
let hash = self.hash();
|
||||
let hc = self.hash_channel();
|
||||
node_config
|
||||
.node
|
||||
.data_base_path
|
||||
.join("cache")
|
||||
.join(&hc[0..3])
|
||||
.join(&hc[3..6])
|
||||
.join(&self.channel.name)
|
||||
.join(format!("{:?}", self.agg_kind))
|
||||
.join(format!("{:019}", self.patch.spec().bin_t_len()))
|
||||
.join(&hash[0..2])
|
||||
.join(format!("{:019}", self.patch.ix()))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn write_pb_cache_min_max_avg_scalar(
|
||||
values: MinMaxAvgScalarBinBatch,
|
||||
patch: PreBinnedPatchCoord,
|
||||
agg_kind: AggKind,
|
||||
channel: Channel,
|
||||
node_config: NodeConfigCached,
|
||||
) -> Result<(), Error> {
|
||||
let cfd = CacheFileDesc {
|
||||
channel: channel.clone(),
|
||||
patch: patch.clone(),
|
||||
agg_kind: agg_kind.clone(),
|
||||
};
|
||||
let path = cfd.path(&node_config);
|
||||
info!("Writing cache file\n{:?}\npath: {:?}", cfd, path);
|
||||
let enc = serde_cbor::to_vec(&values)?;
|
||||
info!("Encoded size: {}", enc.len());
|
||||
tokio::fs::create_dir_all(path.parent().unwrap()).await?;
|
||||
let mut f = OpenOptions::new()
|
||||
.truncate(true)
|
||||
.create(true)
|
||||
.write(true)
|
||||
.open(&path)
|
||||
.await?;
|
||||
f.write_all(&enc).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
73
disk/src/cache/pbv.rs
vendored
73
disk/src/cache/pbv.rs
vendored
@@ -1,13 +1,14 @@
|
||||
use crate::agg::binnedt::IntoBinnedT;
|
||||
use crate::agg::scalarbinbatch::MinMaxAvgScalarBinBatchStreamItem;
|
||||
use crate::agg::scalarbinbatch::{MinMaxAvgScalarBinBatch, MinMaxAvgScalarBinBatchStreamItem};
|
||||
use crate::cache::pbvfs::{PreBinnedItem, PreBinnedValueFetchedStream};
|
||||
use crate::cache::{node_ix_for_patch, MergedFromRemotes, PreBinnedQuery};
|
||||
use crate::cache::{MergedFromRemotes, PreBinnedQuery};
|
||||
use crate::frame::makeframe::make_frame;
|
||||
use crate::raw::EventsQuery;
|
||||
use crate::streamlog::Streamlog;
|
||||
use bytes::Bytes;
|
||||
use err::Error;
|
||||
use futures_core::Stream;
|
||||
use futures_util::pin_mut;
|
||||
use futures_util::{FutureExt, StreamExt};
|
||||
use netpod::log::*;
|
||||
use netpod::streamext::SCC;
|
||||
@@ -58,12 +59,12 @@ pub struct PreBinnedValueStream {
|
||||
errored: bool,
|
||||
completed: bool,
|
||||
streamlog: Streamlog,
|
||||
values: MinMaxAvgScalarBinBatch,
|
||||
write_fut: Option<Pin<Box<dyn Future<Output = Result<(), Error>> + Send>>>,
|
||||
}
|
||||
|
||||
impl PreBinnedValueStream {
|
||||
pub fn new(query: PreBinnedQuery, node_config: &NodeConfigCached) -> Self {
|
||||
// TODO check that we are the correct node.
|
||||
let _node_ix = node_ix_for_patch(&query.patch, &query.channel, &node_config.node_config.cluster);
|
||||
Self {
|
||||
query,
|
||||
node_config: node_config.clone(),
|
||||
@@ -74,7 +75,9 @@ impl PreBinnedValueStream {
|
||||
range_complete_emitted: false,
|
||||
errored: false,
|
||||
completed: false,
|
||||
streamlog: Streamlog::new(),
|
||||
streamlog: Streamlog::new(node_config.ix as u32),
|
||||
values: MinMaxAvgScalarBinBatch::empty(),
|
||||
write_fut: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,32 +185,49 @@ impl Stream for PreBinnedValueStream {
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||
use Poll::*;
|
||||
if self.completed {
|
||||
panic!("PreBinnedValueStream poll_next on completed");
|
||||
}
|
||||
if self.errored {
|
||||
self.completed = true;
|
||||
return Ready(None);
|
||||
}
|
||||
if let Some(item) = self.streamlog.pop() {
|
||||
return Ready(Some(Ok(PreBinnedItem::Log(item))));
|
||||
}
|
||||
'outer: loop {
|
||||
break if self.data_complete {
|
||||
break if self.completed {
|
||||
panic!("PreBinnedValueStream poll_next on completed");
|
||||
} else if self.errored {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else if let Some(item) = self.streamlog.pop() {
|
||||
Ready(Some(Ok(PreBinnedItem::Log(item))))
|
||||
} else if let Some(fut) = &mut self.write_fut {
|
||||
pin_mut!(fut);
|
||||
match fut.poll(cx) {
|
||||
Ready(Ok(())) => {
|
||||
self.write_fut = None;
|
||||
self.streamlog.append(Level::INFO, format!("cache file written"));
|
||||
continue 'outer;
|
||||
}
|
||||
Ready(Err(e)) => {
|
||||
self.errored = true;
|
||||
Ready(Some(Err(e)))
|
||||
}
|
||||
Pending => Pending,
|
||||
}
|
||||
} else if self.data_complete {
|
||||
if self.range_complete_observed {
|
||||
if self.range_complete_emitted {
|
||||
self.completed = true;
|
||||
Ready(None)
|
||||
} else {
|
||||
let msg = format!(
|
||||
"======== STREAMLOG ========= WRITE CACHE FILE\n{:?}\n\n\n",
|
||||
self.query.patch
|
||||
"Write cache file\n{:?}\nN: {}\n\n\n",
|
||||
self.query.patch,
|
||||
self.values.ts1s.len()
|
||||
);
|
||||
self.streamlog.append(Level::INFO, msg);
|
||||
info!(
|
||||
"======================== WRITE CACHE FILE\n{:?}\n\n\n",
|
||||
self.query.patch
|
||||
let values = std::mem::replace(&mut self.values, MinMaxAvgScalarBinBatch::empty());
|
||||
let fut = super::write_pb_cache_min_max_avg_scalar(
|
||||
values,
|
||||
self.query.patch.clone(),
|
||||
self.query.agg_kind.clone(),
|
||||
self.query.channel.clone(),
|
||||
self.node_config.clone(),
|
||||
);
|
||||
self.write_fut = Some(Box::pin(fut));
|
||||
self.range_complete_emitted = true;
|
||||
Ready(Some(Ok(PreBinnedItem::RangeComplete)))
|
||||
}
|
||||
@@ -220,10 +240,17 @@ impl Stream for PreBinnedValueStream {
|
||||
Ready(Some(k)) => match k {
|
||||
Ok(PreBinnedItem::RangeComplete) => {
|
||||
self.range_complete_observed = true;
|
||||
//Ready(Some(Ok(PreBinnedItem::RangeComplete)))
|
||||
continue 'outer;
|
||||
}
|
||||
Ok(PreBinnedItem::Batch(batch)) => Ready(Some(Ok(PreBinnedItem::Batch(batch)))),
|
||||
Ok(PreBinnedItem::Batch(batch)) => {
|
||||
self.values.ts1s.extend(batch.ts1s.iter());
|
||||
self.values.ts2s.extend(batch.ts2s.iter());
|
||||
self.values.counts.extend(batch.counts.iter());
|
||||
self.values.mins.extend(batch.mins.iter());
|
||||
self.values.maxs.extend(batch.maxs.iter());
|
||||
self.values.avgs.extend(batch.avgs.iter());
|
||||
Ready(Some(Ok(PreBinnedItem::Batch(batch))))
|
||||
}
|
||||
Ok(PreBinnedItem::EventDataReadStats(stats)) => {
|
||||
Ready(Some(Ok(PreBinnedItem::EventDataReadStats(stats))))
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::fmt::Formatter;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct LogItem {
|
||||
node_ix: u32,
|
||||
#[serde(with = "levelserde")]
|
||||
level: Level,
|
||||
msg: String,
|
||||
@@ -17,7 +18,7 @@ impl<'de> Visitor<'de> for VisitLevel {
|
||||
type Value = u32;
|
||||
|
||||
fn expecting(&self, fmt: &mut Formatter) -> std::fmt::Result {
|
||||
write!(fmt, "")
|
||||
write!(fmt, "expect u32 Level code")
|
||||
}
|
||||
|
||||
fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E>
|
||||
@@ -75,19 +76,47 @@ mod levelserde {
|
||||
|
||||
pub struct Streamlog {
|
||||
items: VecDeque<LogItem>,
|
||||
node_ix: u32,
|
||||
}
|
||||
|
||||
impl Streamlog {
|
||||
pub fn new() -> Self {
|
||||
Self { items: VecDeque::new() }
|
||||
pub fn new(node_ix: u32) -> Self {
|
||||
Self {
|
||||
items: VecDeque::new(),
|
||||
node_ix,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append(&mut self, level: Level, msg: String) {
|
||||
let item = LogItem { level, msg };
|
||||
let item = LogItem {
|
||||
node_ix: self.node_ix,
|
||||
level,
|
||||
msg,
|
||||
};
|
||||
self.items.push_back(item);
|
||||
}
|
||||
|
||||
pub fn pop(&mut self) -> Option<LogItem> {
|
||||
self.items.pop_back()
|
||||
}
|
||||
|
||||
pub fn emit(item: &LogItem) {
|
||||
match item.level {
|
||||
Level::ERROR => {
|
||||
error!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
Level::WARN => {
|
||||
warn!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
Level::INFO => {
|
||||
info!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
Level::DEBUG => {
|
||||
debug!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
Level::TRACE => {
|
||||
trace!("StreamLog Node {} {}", item.node_ix, item.msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user