Mark closed pulse map files

This commit is contained in:
Dominik Werder
2021-09-17 20:38:20 +02:00
parent ad7f25b4c6
commit e1e930f453
14 changed files with 263 additions and 161 deletions

View File

@@ -14,7 +14,9 @@ use netpod::{
log::*, ByteSize, Channel, FileIoBufferSize, NanoRange, NodeConfigCached, PerfOpts, ScalarType, Shape, APP_OCTET,
};
use netpod::{ChannelSearchQuery, ChannelSearchResult, ProxyConfig, APP_JSON};
use parse::channelconfig::{extract_matching_config_entry, read_local_config, Config, MatchingConfigEntry};
use parse::channelconfig::{
extract_matching_config_entry, read_local_config, Config, ConfigEntry, MatchingConfigEntry,
};
use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
use std::future::Future;
@@ -537,6 +539,93 @@ impl DataApiPython3DataStream {
completed: false,
}
}
fn convert_item(
b: EventFull,
channel: &Channel,
entry: &ConfigEntry,
header_out: &mut bool,
count_events: &mut usize,
) -> Result<BytesMut, Error> {
let mut d = BytesMut::new();
for i1 in 0..b.tss.len() {
if *count_events < 6 {
info!(
"deco len {:?} BE {} scalar-type {:?} shape {:?} comps {:?}",
b.decomps[i1].as_ref().map(|x| x.len()),
b.be[i1],
b.scalar_types[i1],
b.shapes[i1],
b.comps[i1],
);
}
// TODO emit warning when we use a different setting compared to channel config.
if false {
let _compression = if let (Shape::Image(..), Some(..)) = (&b.shapes[i1], &b.comps[i1]) {
Some(1)
} else {
None
};
};
let compression = if let Some(_) = &b.comps[i1] { Some(1) } else { None };
if !*header_out {
let head = Api1ChannelHeader {
name: channel.name.clone(),
ty: scalar_type_to_api3proto(&b.scalar_types[i1]).into(),
byte_order: if b.be[i1] {
"BIG_ENDIAN".into()
} else {
"LITTLE_ENDIAN".into()
},
// The shape is inconsistent on the events.
// Seems like the config is to be trusted in this case.
shape: shape_to_api3proto(&entry.shape),
compression,
};
let h = serde_json::to_string(&head)?;
info!("sending channel header {}", h);
let l1 = 1 + h.as_bytes().len() as u32;
d.put_u32(l1);
d.put_u8(0);
d.extend_from_slice(h.as_bytes());
d.put_u32(l1);
*header_out = true;
}
{
match &b.shapes[i1] {
Shape::Image(_, _) => {
let l1 = 17 + b.blobs[i1].len() as u32;
d.put_u32(l1);
d.put_u8(1);
d.put_u64(b.tss[i1]);
d.put_u64(b.pulses[i1]);
d.put_slice(&b.blobs[i1]);
d.put_u32(l1);
}
Shape::Wave(_) => {
let l1 = 17 + b.blobs[i1].len() as u32;
d.put_u32(l1);
d.put_u8(1);
d.put_u64(b.tss[i1]);
d.put_u64(b.pulses[i1]);
d.put_slice(&b.blobs[i1]);
d.put_u32(l1);
}
_ => {
let l1 = 17 + b.blobs[i1].len() as u32;
d.put_u32(l1);
d.put_u8(1);
d.put_u64(b.tss[i1]);
d.put_u64(b.pulses[i1]);
d.put_slice(&b.blobs[i1]);
d.put_u32(l1);
}
}
}
*count_events += 1;
}
Ok(d)
}
}
impl Stream for DataApiPython3DataStream {
@@ -583,9 +672,10 @@ impl Stream for DataApiPython3DataStream {
}
MatchingConfigEntry::Entry(entry) => entry.clone(),
};
warn!("found channel_config {:?}", entry);
let channel = self.channels[self.chan_ix - 1].clone();
info!("found channel_config for {}: {:?}", channel.name, entry);
let evq = RawEventsQuery {
channel: self.channels[self.chan_ix - 1].clone(),
channel,
range: self.range.clone(),
agg_kind: netpod::AggKind::EventBlobs,
disk_io_buffer_size: self.file_io_buffer_size.0,
@@ -626,70 +716,13 @@ impl Stream for DataApiPython3DataStream {
Ok(b) => {
let f = match b {
StreamItem::DataItem(RangeCompletableItem::Data(b)) => {
let mut d = BytesMut::new();
for i1 in 0..b.tss.len() {
if count_events < 6 {
info!(
"deco len {:?} BE {} scalar-type {:?} shape {:?} comps {:?}",
b.decomps[i1].as_ref().map(|x| x.len()),
b.be[i1],
b.scalar_types[i1],
b.shapes[i1],
b.comps[i1],
);
}
let compression = if let (Shape::Image(..), Some(..)) = (&b.shapes[i1], &b.comps[i1]) { Some(1) } else { None };
if !header_out {
let head = Api1ChannelHeader {
name: channel.name.clone(),
ty: scalar_type_to_api3proto(&b.scalar_types[i1])
.into(),
byte_order: if b.be[i1] {
"BIG_ENDIAN".into()
} else {
"LITTLE_ENDIAN".into()
},
// The shape is inconsistent on the events.
// Seems like the config is to be trusted in this case.
shape: shape_to_api3proto(&entry.shape),
compression,
};
let h = serde_json::to_string(&head)?;
info!("sending channel header {}", h);
let l1 = 1 + h.as_bytes().len() as u32;
d.put_u32(l1);
d.put_u8(0);
d.extend_from_slice(h.as_bytes());
d.put_u32(l1);
header_out = true;
}
{
match &b.shapes[i1] {
Shape::Image(_, _) => {
let l1 = 17 + b.blobs[i1].len() as u32;
d.put_u32(l1);
d.put_u8(1);
d.put_u64(b.tss[i1]);
d.put_u64(b.pulses[i1]);
d.put_slice(&b.blobs[i1]);
d.put_u32(l1);
}
_ => {
if let Some(deco) = &b.decomps[i1] {
let l1 = 17 + deco.len() as u32;
d.put_u32(l1);
d.put_u8(1);
d.put_u64(b.tss[i1]);
d.put_u64(b.pulses[i1]);
d.put_slice(&deco);
d.put_u32(l1);
}
}
}
}
count_events += 1;
}
d
Self::convert_item(
b,
&channel,
&entry,
&mut header_out,
&mut count_events,
)?
}
_ => BytesMut::new(),
};
@@ -732,7 +765,7 @@ impl Stream for DataApiPython3DataStream {
}
pub async fn api1_binary_events(req: Request<Body>, node_config: &NodeConfigCached) -> Result<Response<Body>, Error> {
info!("api1_binary_events headers: {:?}", req.headers());
info!("api1_binary_events uri: {:?} headers: {:?}", req.uri(), req.headers());
let accept_def = "";
let accept = req
.headers()
@@ -741,8 +774,12 @@ pub async fn api1_binary_events(req: Request<Body>, node_config: &NodeConfigCach
.to_owned();
let (_head, body) = req.into_parts();
let body_data = hyper::body::to_bytes(body).await?;
info!("got body_data: {:?}", String::from_utf8(body_data[..].to_vec()));
let qu: Api1Query = serde_json::from_slice(&body_data)?;
let qu: Api1Query = if let Ok(qu) = serde_json::from_slice(&body_data) {
qu
} else {
error!("got body_data: {:?}", String::from_utf8(body_data[..].to_vec()));
return Err(Error::with_msg_no_trace("can not parse query"));
};
info!("got Api1Query: {:?}", qu);
let beg_date = chrono::DateTime::parse_from_rfc3339(&qu.range.start_date);
let end_date = chrono::DateTime::parse_from_rfc3339(&qu.range.end_date);

View File

@@ -149,6 +149,7 @@ macro_rules! static_http_api1 {
}
async fn http_service_try(req: Request<Body>, node_config: &NodeConfigCached) -> Result<Response<Body>, Error> {
info!("http_service_try {:?}", req.uri());
let uri = req.uri().clone();
let path = uri.path();
if path == "/api/4/node_status" {
@@ -251,10 +252,12 @@ async fn http_service_try(req: Request<Body>, node_config: &NodeConfigCached) ->
if req.method() == Method::POST {
Ok(api1::api1_binary_events(req, &node_config).await?)
} else {
Ok(response(StatusCode::NOT_ACCEPTABLE).body(Body::empty())?)
Ok(response(StatusCode::METHOD_NOT_ALLOWED).body(Body::empty())?)
}
} else if pulsemap::IndexFullHttpFunction::path_matches(path) {
pulsemap::IndexFullHttpFunction::handle(req, &node_config).await
} else if pulsemap::MarkClosedHttpFunction::path_matches(path) {
pulsemap::MarkClosedHttpFunction::handle(req, &node_config).await
} else if pulsemap::MapPulseLocalHttpFunction::path_matches(path) {
pulsemap::MapPulseLocalHttpFunction::handle(req, &node_config).await
} else if pulsemap::MapPulseHistoHttpFunction::path_matches(path) {
@@ -387,7 +390,7 @@ async fn binned(req: Request<Body>, node_config: &NodeConfigCached) -> Result<Re
let desc = format!("binned-BEG-{}-END-{}", query.range().beg / SEC, query.range().end / SEC);
let span1 = span!(Level::INFO, "httpret::binned", desc = &desc.as_str());
span1.in_scope(|| {
info!("binned STARTING");
info!("binned STARTING {:?}", query);
});
match head.headers.get(http::header::ACCEPT) {
Some(v) if v == APP_OCTET => binned_binary(query, node_config).await,

View File

@@ -36,6 +36,7 @@ const _MAP_INDEX_FAST_URL_PREFIX: &'static str = "/api/1/map/index/fast/";
const MAP_PULSE_HISTO_URL_PREFIX: &'static str = "/api/1/map/pulse/histo/";
const MAP_PULSE_URL_PREFIX: &'static str = "/api/1/map/pulse/";
const MAP_PULSE_LOCAL_URL_PREFIX: &'static str = "/api/1/map/pulse/local/";
const MAP_PULSE_MARK_CLOSED_URL_PREFIX: &'static str = "/api/1/map/pulse/mark/closed/";
async fn make_tables(node_config: &NodeConfigCached) -> Result<(), Error> {
let conn = dbconn::create_connection(&node_config.node_config.cluster.database).await?;
@@ -217,53 +218,73 @@ impl IndexFullHttpFunction {
if req.method() != Method::GET {
return Ok(response(StatusCode::NOT_ACCEPTABLE).body(Body::empty())?);
}
Self::index(node_config).await
let ret = match Self::index(node_config).await {
Ok(msg) => response(StatusCode::OK).body(Body::from(msg))?,
Err(e) => response(StatusCode::INTERNAL_SERVER_ERROR).body(Body::from(format!("{:?}", e)))?,
};
Ok(ret)
}
pub async fn index(node_config: &NodeConfigCached) -> Result<Response<Body>, Error> {
pub async fn index_channel(
channel_name: String,
conn: &dbconn::pg::Client,
node_config: &NodeConfigCached,
) -> Result<String, Error> {
let mut msg = format!("Index channel {}", channel_name);
let files = datafiles_for_channel(channel_name.clone(), node_config).await?;
msg = format!("{}\n{:?}", msg, files);
for path in files {
let splitted: Vec<_> = path.to_str().unwrap().split("/").collect();
let timebin: u64 = splitted[splitted.len() - 3].parse()?;
let split: u64 = splitted[splitted.len() - 2].parse()?;
if false {
info!(
"hostname {} timebin {} split {}",
node_config.node.host, timebin, split
);
}
let file = tokio::fs::OpenOptions::new().read(true).open(path).await?;
let (r2, file) = read_first_chunk(file).await?;
msg = format!("{}\n{:?}", msg, r2);
let (r3, _file) = read_last_chunk(file, r2.pos, r2.len).await?;
msg = format!("{}\n{:?}", msg, r3);
// TODO remove update of static columns when older clients are removed.
let sql = "insert into map_pulse_files (channel, split, timebin, pulse_min, pulse_max, hostname) values ($1, $2, $3, $4, $5, $6) on conflict (channel, split, timebin) do update set pulse_min = $4, pulse_max = $5, upc1 = map_pulse_files.upc1 + 1, hostname = $6";
conn.execute(
sql,
&[
&channel_name,
&(split as i32),
&(timebin as i32),
&(r2.pulse as i64),
&(r3.pulse as i64),
&node_config.node.host,
],
)
.await?;
}
Ok(msg)
}
pub async fn index(node_config: &NodeConfigCached) -> Result<String, Error> {
// TODO avoid double-insert on central storage.
// TODO Mark files as "closed".
let mut msg = format!("LOG");
make_tables(node_config).await?;
let conn = dbconn::create_connection(&node_config.node_config.cluster.database).await?;
let chs = timer_channel_names();
for channel_name in &chs[..] {
info!("channel_name {}", channel_name);
let files = datafiles_for_channel(channel_name.clone(), node_config).await?;
msg = format!("\n{:?}", files);
for path in files {
let splitted: Vec<_> = path.to_str().unwrap().split("/").collect();
//info!("splitted: {:?}", splitted);
let timebin: u64 = splitted[splitted.len() - 3].parse()?;
let split: u64 = splitted[splitted.len() - 2].parse()?;
if false {
info!(
"hostname {} timebin {} split {}",
node_config.node.host, timebin, split
);
match Self::index_channel(channel_name.clone(), &conn, node_config).await {
Ok(m) => {
msg.push_str("\n");
msg.push_str(&m);
}
Err(e) => {
error!("error while indexing {} {:?}", channel_name, e);
return Err(e);
}
let file = tokio::fs::OpenOptions::new().read(true).open(path).await?;
let (r2, file) = read_first_chunk(file).await?;
msg = format!("{}\n{:?}", msg, r2);
let (r3, _file) = read_last_chunk(file, r2.pos, r2.len).await?;
msg = format!("{}\n{:?}", msg, r3);
// TODO remove update of static when older clients are removed.
let sql = "insert into map_pulse_files (channel, split, timebin, pulse_min, pulse_max, hostname) values ($1, $2, $3, $4, $5, $6) on conflict (channel, split, timebin) do update set pulse_min = $4, pulse_max = $5, upc1 = map_pulse_files.upc1 + 1, hostname = $6";
conn.execute(
sql,
&[
&channel_name,
&(split as i32),
&(timebin as i32),
&(r2.pulse as i64),
&(r3.pulse as i64),
&node_config.node.host,
],
)
.await?;
}
}
Ok(response(StatusCode::OK).body(Body::from(msg))?)
Ok(msg)
}
}
@@ -526,6 +547,7 @@ impl MapPulseHttpFunction {
if req.method() != Method::GET {
return Ok(response(StatusCode::NOT_ACCEPTABLE).body(Body::empty())?);
}
info!("MapPulseHttpFunction handle uri: {:?}", req.uri());
let urls = format!("{}", req.uri());
let pulse: u64 = urls[MAP_PULSE_URL_PREFIX.len()..].parse()?;
let histo = MapPulseHistoHttpFunction::histo(pulse, node_config).await?;
@@ -544,3 +566,54 @@ impl MapPulseHttpFunction {
}
}
}
pub struct MarkClosedHttpFunction {}
impl MarkClosedHttpFunction {
pub fn path_matches(path: &str) -> bool {
path.starts_with(MAP_PULSE_MARK_CLOSED_URL_PREFIX)
}
pub async fn handle(req: Request<Body>, node_config: &NodeConfigCached) -> Result<Response<Body>, Error> {
if req.method() != Method::GET {
return Ok(response(StatusCode::NOT_ACCEPTABLE).body(Body::empty())?);
}
info!("MarkClosedHttpFunction handle uri: {:?}", req.uri());
match MarkClosedHttpFunction::mark_closed(node_config).await {
Ok(_) => {
let ret = response(StatusCode::OK).body(Body::empty())?;
Ok(ret)
}
Err(e) => {
let msg = format!("{:?}", e);
let ret = response(StatusCode::INTERNAL_SERVER_ERROR).body(Body::from(msg))?;
Ok(ret)
}
}
}
pub async fn mark_closed(node_config: &NodeConfigCached) -> Result<(), Error> {
let conn = dbconn::create_connection(&node_config.node_config.cluster.database).await?;
let sql = "select distinct channel from map_pulse_files order by channel";
let rows = conn.query(sql, &[]).await?;
let chns: Vec<_> = rows.iter().map(|r| r.get::<_, String>(0)).collect();
for chn in &chns {
let sql = concat!(
"with q1 as (select channel, split, timebin from map_pulse_files",
" where channel = $1 and hostname = $2",
" order by timebin desc offset 2)",
" update map_pulse_files t2 set closed = 1 from q1",
" where t2.channel = q1.channel",
" and t2.closed = 0",
" and t2.split = q1.split",
" and t2.timebin = q1.timebin",
);
let nmod = conn.execute(sql, &[&chn, &node_config.node.host]).await?;
info!(
"mark files mod {} chn {:?} host {:?}",
nmod, chn, node_config.node.host
);
}
Ok(())
}
}