Write cache

This commit is contained in:
Dominik Werder
2021-05-06 10:09:06 +02:00
parent a8932dba0d
commit 3eef0b4113
9 changed files with 208 additions and 43 deletions

View File

@@ -1,4 +1,5 @@
use crate::agg::eventbatch::MinMaxAvgScalarEventBatchStreamItem;
use crate::agg::scalarbinbatch::MinMaxAvgScalarBinBatch;
use crate::binnedstream::BinnedStream;
use crate::cache::pbv::PreBinnedValueByteStream;
use crate::channelconfig::{extract_matching_config_entry, read_local_config};
@@ -15,12 +16,15 @@ use netpod::{
AggKind, BinnedRange, Channel, Cluster, NanoRange, NodeConfigCached, PreBinnedPatchCoord, PreBinnedPatchIterator,
PreBinnedPatchRange, ToNanos,
};
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::future::Future;
use std::path::PathBuf;
use std::pin::Pin;
use std::task::{Context, Poll};
use tiny_keccak::Hasher;
use tokio::io::{AsyncRead, ReadBuf};
use tokio::fs::OpenOptions;
use tokio::io::{AsyncRead, AsyncWriteExt, ReadBuf};
#[allow(unused_imports)]
use tracing::{debug, error, info, trace, warn};
@@ -244,8 +248,16 @@ pub fn pre_binned_bytes_for_http(
query: &PreBinnedQuery,
) -> Result<PreBinnedValueByteStream, Error> {
info!("pre_binned_bytes_for_http {:?} {:?}", query, node_config.node);
let ret = super::cache::pbv::pre_binned_value_byte_stream_new(query, node_config);
Ok(ret)
let patch_node_ix = node_ix_for_patch(&query.patch, &query.channel, &node_config.node_config.cluster);
if node_config.ix as u32 != patch_node_ix {
Err(Error::with_msg(format!(
"pre_binned_bytes_for_http node mismatch node_config.ix {} patch_node_ix {}",
node_config.ix, patch_node_ix
)))
} else {
let ret = super::cache::pbv::pre_binned_value_byte_stream_new(query, node_config);
Ok(ret)
}
}
pub struct HttpBodyAsAsyncRead {
@@ -432,3 +444,80 @@ pub fn node_ix_for_patch(patch_coord: &PreBinnedPatchCoord, channel: &Channel, c
let ix = u32::from_le_bytes(a) % cluster.nodes.len() as u32;
ix
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CacheFileDesc {
// What identifies a cached file?
channel: Channel,
agg_kind: AggKind,
patch: PreBinnedPatchCoord,
}
impl CacheFileDesc {
pub fn hash(&self) -> String {
let mut h = tiny_keccak::Sha3::v256();
h.update(b"V000");
h.update(self.channel.backend.as_bytes());
h.update(self.channel.name.as_bytes());
h.update(format!("{:?}", self.agg_kind).as_bytes());
h.update(&self.patch.spec().bin_t_len().to_le_bytes());
h.update(&self.patch.spec().patch_t_len().to_le_bytes());
h.update(&self.patch.ix().to_le_bytes());
let mut buf = [0; 32];
h.finalize(&mut buf);
hex::encode(&buf)
}
pub fn hash_channel(&self) -> String {
let mut h = tiny_keccak::Sha3::v256();
h.update(b"V000");
h.update(self.channel.backend.as_bytes());
h.update(self.channel.name.as_bytes());
let mut buf = [0; 32];
h.finalize(&mut buf);
hex::encode(&buf)
}
pub fn path(&self, node_config: &NodeConfigCached) -> PathBuf {
let hash = self.hash();
let hc = self.hash_channel();
node_config
.node
.data_base_path
.join("cache")
.join(&hc[0..3])
.join(&hc[3..6])
.join(&self.channel.name)
.join(format!("{:?}", self.agg_kind))
.join(format!("{:019}", self.patch.spec().bin_t_len()))
.join(&hash[0..2])
.join(format!("{:019}", self.patch.ix()))
}
}
pub async fn write_pb_cache_min_max_avg_scalar(
values: MinMaxAvgScalarBinBatch,
patch: PreBinnedPatchCoord,
agg_kind: AggKind,
channel: Channel,
node_config: NodeConfigCached,
) -> Result<(), Error> {
let cfd = CacheFileDesc {
channel: channel.clone(),
patch: patch.clone(),
agg_kind: agg_kind.clone(),
};
let path = cfd.path(&node_config);
info!("Writing cache file\n{:?}\npath: {:?}", cfd, path);
let enc = serde_cbor::to_vec(&values)?;
info!("Encoded size: {}", enc.len());
tokio::fs::create_dir_all(path.parent().unwrap()).await?;
let mut f = OpenOptions::new()
.truncate(true)
.create(true)
.write(true)
.open(&path)
.await?;
f.write_all(&enc).await?;
Ok(())
}

73
disk/src/cache/pbv.rs vendored
View File

@@ -1,13 +1,14 @@
use crate::agg::binnedt::IntoBinnedT;
use crate::agg::scalarbinbatch::MinMaxAvgScalarBinBatchStreamItem;
use crate::agg::scalarbinbatch::{MinMaxAvgScalarBinBatch, MinMaxAvgScalarBinBatchStreamItem};
use crate::cache::pbvfs::{PreBinnedItem, PreBinnedValueFetchedStream};
use crate::cache::{node_ix_for_patch, MergedFromRemotes, PreBinnedQuery};
use crate::cache::{MergedFromRemotes, PreBinnedQuery};
use crate::frame::makeframe::make_frame;
use crate::raw::EventsQuery;
use crate::streamlog::Streamlog;
use bytes::Bytes;
use err::Error;
use futures_core::Stream;
use futures_util::pin_mut;
use futures_util::{FutureExt, StreamExt};
use netpod::log::*;
use netpod::streamext::SCC;
@@ -58,12 +59,12 @@ pub struct PreBinnedValueStream {
errored: bool,
completed: bool,
streamlog: Streamlog,
values: MinMaxAvgScalarBinBatch,
write_fut: Option<Pin<Box<dyn Future<Output = Result<(), Error>> + Send>>>,
}
impl PreBinnedValueStream {
pub fn new(query: PreBinnedQuery, node_config: &NodeConfigCached) -> Self {
// TODO check that we are the correct node.
let _node_ix = node_ix_for_patch(&query.patch, &query.channel, &node_config.node_config.cluster);
Self {
query,
node_config: node_config.clone(),
@@ -74,7 +75,9 @@ impl PreBinnedValueStream {
range_complete_emitted: false,
errored: false,
completed: false,
streamlog: Streamlog::new(),
streamlog: Streamlog::new(node_config.ix as u32),
values: MinMaxAvgScalarBinBatch::empty(),
write_fut: None,
}
}
@@ -182,32 +185,49 @@ impl Stream for PreBinnedValueStream {
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
if self.completed {
panic!("PreBinnedValueStream poll_next on completed");
}
if self.errored {
self.completed = true;
return Ready(None);
}
if let Some(item) = self.streamlog.pop() {
return Ready(Some(Ok(PreBinnedItem::Log(item))));
}
'outer: loop {
break if self.data_complete {
break if self.completed {
panic!("PreBinnedValueStream poll_next on completed");
} else if self.errored {
self.completed = true;
Ready(None)
} else if let Some(item) = self.streamlog.pop() {
Ready(Some(Ok(PreBinnedItem::Log(item))))
} else if let Some(fut) = &mut self.write_fut {
pin_mut!(fut);
match fut.poll(cx) {
Ready(Ok(())) => {
self.write_fut = None;
self.streamlog.append(Level::INFO, format!("cache file written"));
continue 'outer;
}
Ready(Err(e)) => {
self.errored = true;
Ready(Some(Err(e)))
}
Pending => Pending,
}
} else if self.data_complete {
if self.range_complete_observed {
if self.range_complete_emitted {
self.completed = true;
Ready(None)
} else {
let msg = format!(
"======== STREAMLOG ========= WRITE CACHE FILE\n{:?}\n\n\n",
self.query.patch
"Write cache file\n{:?}\nN: {}\n\n\n",
self.query.patch,
self.values.ts1s.len()
);
self.streamlog.append(Level::INFO, msg);
info!(
"======================== WRITE CACHE FILE\n{:?}\n\n\n",
self.query.patch
let values = std::mem::replace(&mut self.values, MinMaxAvgScalarBinBatch::empty());
let fut = super::write_pb_cache_min_max_avg_scalar(
values,
self.query.patch.clone(),
self.query.agg_kind.clone(),
self.query.channel.clone(),
self.node_config.clone(),
);
self.write_fut = Some(Box::pin(fut));
self.range_complete_emitted = true;
Ready(Some(Ok(PreBinnedItem::RangeComplete)))
}
@@ -220,10 +240,17 @@ impl Stream for PreBinnedValueStream {
Ready(Some(k)) => match k {
Ok(PreBinnedItem::RangeComplete) => {
self.range_complete_observed = true;
//Ready(Some(Ok(PreBinnedItem::RangeComplete)))
continue 'outer;
}
Ok(PreBinnedItem::Batch(batch)) => Ready(Some(Ok(PreBinnedItem::Batch(batch)))),
Ok(PreBinnedItem::Batch(batch)) => {
self.values.ts1s.extend(batch.ts1s.iter());
self.values.ts2s.extend(batch.ts2s.iter());
self.values.counts.extend(batch.counts.iter());
self.values.mins.extend(batch.mins.iter());
self.values.maxs.extend(batch.maxs.iter());
self.values.avgs.extend(batch.avgs.iter());
Ready(Some(Ok(PreBinnedItem::Batch(batch))))
}
Ok(PreBinnedItem::EventDataReadStats(stats)) => {
Ready(Some(Ok(PreBinnedItem::EventDataReadStats(stats))))
}

View File

@@ -6,6 +6,7 @@ use std::fmt::Formatter;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct LogItem {
node_ix: u32,
#[serde(with = "levelserde")]
level: Level,
msg: String,
@@ -17,7 +18,7 @@ impl<'de> Visitor<'de> for VisitLevel {
type Value = u32;
fn expecting(&self, fmt: &mut Formatter) -> std::fmt::Result {
write!(fmt, "")
write!(fmt, "expect u32 Level code")
}
fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E>
@@ -75,19 +76,47 @@ mod levelserde {
pub struct Streamlog {
items: VecDeque<LogItem>,
node_ix: u32,
}
impl Streamlog {
pub fn new() -> Self {
Self { items: VecDeque::new() }
pub fn new(node_ix: u32) -> Self {
Self {
items: VecDeque::new(),
node_ix,
}
}
pub fn append(&mut self, level: Level, msg: String) {
let item = LogItem { level, msg };
let item = LogItem {
node_ix: self.node_ix,
level,
msg,
};
self.items.push_back(item);
}
pub fn pop(&mut self) -> Option<LogItem> {
self.items.pop_back()
}
pub fn emit(item: &LogItem) {
match item.level {
Level::ERROR => {
error!("StreamLog Node {} {}", item.node_ix, item.msg);
}
Level::WARN => {
warn!("StreamLog Node {} {}", item.node_ix, item.msg);
}
Level::INFO => {
info!("StreamLog Node {} {}", item.node_ix, item.msg);
}
Level::DEBUG => {
debug!("StreamLog Node {} {}", item.node_ix, item.msg);
}
Level::TRACE => {
trace!("StreamLog Node {} {}", item.node_ix, item.msg);
}
}
}
}