Refactor series lookup

This commit is contained in:
Dominik Werder
2024-05-16 23:33:34 +02:00
parent 82455a2b16
commit 6224df534a
41 changed files with 762 additions and 562 deletions

View File

@@ -5,11 +5,11 @@ use err::Error;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::ChConf;
use netpod::NodeConfigCached;
use netpod::ScalarType;
use netpod::Shape;
use netpod::TsMs;
use std::time::Duration;
use tokio_postgres::Client;
/// It is an unsolved question as to how we want to uniquely address channels.
/// Currently, the usual (backend, channelname) works in 99% of the cases, but the edge-cases
@@ -19,13 +19,14 @@ use std::time::Duration;
/// Otherwise we try to uniquely identify the series id from the given information.
/// In the future, we can even try to involve time range information for that, but backends like
/// old archivers and sf databuffer do not support such lookup.
pub async fn chconf_best_matching_for_name_and_range(
pub(super) async fn chconf_best_matching_for_name_and_range(
backend: &str,
name: &str,
range: NanoRange,
ncc: &NodeConfigCached,
pg: &Client,
) -> Result<ChConf, Error> {
debug!("chconf_best_matching_for_name_and_range {backend} {name} {range:?}");
#[cfg(DISABLED)]
if ncc.node_config.cluster.scylla.is_none() {
let e = Error::with_msg_no_trace(format!(
"chconf_best_matching_for_name_and_range but not a scylla backend"
@@ -33,21 +34,20 @@ pub async fn chconf_best_matching_for_name_and_range(
error!("{e}");
return Err(e);
};
#[cfg(DISABLED)]
if backend != ncc.node_config.cluster.backend {
warn!(
"mismatched backend {} vs {}",
backend, ncc.node_config.cluster.backend
);
}
let dbconf = &ncc.node_config.cluster.database;
let pgclient = crate::create_connection(dbconf).await?;
let sql = concat!(
"select unnest(tscs) as tsc, series, scalar_type, shape_dims",
" from series_by_channel",
" where kind = 2 and facility = $1 and channel = $2",
" order by tsc",
);
let res = pgclient.query(sql, &[&backend, &name]).await.err_conv()?;
let res = pg.query(sql, &[&backend, &name]).await.err_conv()?;
if res.len() == 0 {
let e = Error::with_public_msg_no_trace(format!("can not find channel information for {name}"));
warn!("{e}");
@@ -70,7 +70,7 @@ pub async fn chconf_best_matching_for_name_and_range(
let tsmss: Vec<_> = rows.iter().map(|x| x.0.clone()).collect();
let range = (TsMs(range.beg / 1000), TsMs(range.end / 1000));
let res = decide_best_matching_index(range, &tsmss)?;
let ch_conf = chconf_for_series(backend, rows[res].1, ncc).await?;
let ch_conf = chconf_for_series(backend, rows[res].1, pg).await?;
Ok(ch_conf)
} else {
let r = res.first().unwrap();
@@ -191,10 +191,8 @@ fn test_decide_best_matching_index_after_01() {
assert_eq!(i, 0);
}
pub async fn chconf_for_series(backend: &str, series: u64, ncc: &NodeConfigCached) -> Result<ChConf, Error> {
let dbconf = &ncc.node_config.cluster.database;
let pgclient = crate::create_connection(dbconf).await?;
let res = pgclient
pub(super) async fn chconf_for_series(backend: &str, series: u64, pg: &Client) -> Result<ChConf, Error> {
let res = pg
.query(
"select channel, scalar_type, shape_dims from series_by_channel where facility = $1 and series = $2",
&[&backend, &(series as i64)],

View File

@@ -3,6 +3,7 @@ pub mod channelinfo;
pub mod query;
pub mod scan;
pub mod search;
pub mod worker;
pub mod pg {
pub use tokio_postgres::types::Type;
@@ -28,6 +29,7 @@ use serde::Serialize;
use std::sync::Arc;
use std::time::Duration;
use taskrun::tokio;
use tokio::task::JoinHandle;
trait ErrConv<T> {
fn err_conv(self) -> Result<T, Error>;
@@ -63,27 +65,28 @@ pub async fn delay_io_medium() {
delay_us(2000).await;
}
pub async fn create_connection(db_config: &Database) -> Result<PgClient, Error> {
pub async fn create_connection(db_config: &Database) -> Result<(PgClient, JoinHandle<Result<(), Error>>), Error> {
warn!("create_connection\n\n CREATING CONNECTION\n\n");
// TODO use a common already running worker pool for these queries:
let d = db_config;
let uri = format!("postgresql://{}:{}@{}:{}/{}", d.user, d.pass, d.host, d.port, d.name);
let (cl, conn) = tokio_postgres::connect(&uri, NoTls)
.await
.map_err(|e| format!("Can not connect to database: {e:?}"))
//.errconv()
?;
// TODO monitor connection drop.
let _cjh = tokio::spawn(async move {
if let Err(e) = conn.await {
error!("connection error: {}", e);
.map_err(|e| format!("Can not connect to database: {e}"))?;
let jh = tokio::spawn(async move {
match conn.await {
Ok(()) => Ok(()),
Err(e) => {
error!("connection error: {}", e);
Err(Error::from_string(e))
}
}
Ok::<_, Error>(())
});
Ok(cl)
Ok((cl, jh))
}
pub async fn channel_exists(channel_name: &str, node_config: &NodeConfigCached) -> Result<bool, Error> {
let cl = create_connection(&node_config.node_config.cluster.database).await?;
let (cl, _pgjh) = create_connection(&node_config.node_config.cluster.database).await?;
let rows = cl
.query("select rowid from channels where name = $1::text", &[&channel_name])
.await
@@ -101,7 +104,7 @@ pub async fn channel_exists(channel_name: &str, node_config: &NodeConfigCached)
}
pub async fn database_size(node_config: &NodeConfigCached) -> Result<u64, Error> {
let cl = create_connection(&node_config.node_config.cluster.database).await?;
let (cl, _pgjh) = create_connection(&node_config.node_config.cluster.database).await?;
let rows = cl
.query(
"select pg_database_size($1::text)",
@@ -129,7 +132,7 @@ pub async fn table_sizes(node_config: &NodeConfigCached) -> Result<TableSizes, E
"ORDER BY pg_total_relation_size(C.oid) DESC LIMIT 20",
);
let sql = sql.as_str();
let cl = create_connection(&node_config.node_config.cluster.database).await?;
let (cl, _pgjh) = create_connection(&node_config.node_config.cluster.database).await?;
let rows = cl.query(sql, &[]).await.err_conv()?;
let mut sizes = TableSizes { sizes: Vec::new() };
sizes.sizes.push((format!("table"), format!("size")));
@@ -141,7 +144,7 @@ pub async fn table_sizes(node_config: &NodeConfigCached) -> Result<TableSizes, E
pub async fn random_channel(node_config: &NodeConfigCached) -> Result<String, Error> {
let sql = "select name from channels order by rowid limit 1 offset (random() * (select count(rowid) from channels))::bigint";
let cl = create_connection(&node_config.node_config.cluster.database).await?;
let (cl, _pgjh) = create_connection(&node_config.node_config.cluster.database).await?;
let rows = cl.query(sql, &[]).await.err_conv()?;
if rows.len() == 0 {
Err(Error::with_msg("can not get random channel"))?;

View File

@@ -6,7 +6,6 @@ use netpod::NodeConfigCached;
use netpod::SfDbChannel;
// For sf-databuffer backend, given a Channel, try to complete the information if only id is given.
#[allow(unused)]
async fn sf_databuffer_fetch_channel_by_series(
channel: SfDbChannel,
ncc: &NodeConfigCached,
@@ -24,7 +23,7 @@ async fn sf_databuffer_fetch_channel_by_series(
let series = channel
.series()
.ok_or_else(|| Error::with_msg_no_trace("no series id given"))? as i64;
let pgcon = create_connection(&ncc.node_config.cluster.database).await?;
let (pgcon, _pgjh) = create_connection(&ncc.node_config.cluster.database).await?;
let mut rows = pgcon
.query("select name from channels where rowid = $1", &[&series])
.await

View File

@@ -30,8 +30,6 @@ use tokio::fs::DirEntry;
use tokio::fs::ReadDir;
use tokio_postgres::Client;
mod updatechannelnames;
#[derive(Debug, Serialize, Deserialize)]
pub struct NodeDiskIdent {
pub rowid: i64,
@@ -201,7 +199,7 @@ async fn update_db_with_channel_names_inner(
node_config: NodeConfigCached,
db_config: Database,
) -> Result<(), Error> {
let dbc = create_connection(&db_config).await?;
let (dbc, _pgjh) = create_connection(&db_config).await?;
info!("update_db_with_channel_names connection done");
let node_disk_ident = get_node_disk_ident(&node_config, &dbc).await?;
info!("update_db_with_channel_names get_node_disk_ident done");
@@ -335,7 +333,7 @@ async fn update_db_with_all_channel_configs_inner(
node_config: NodeConfigCached,
) -> Result<(), Error> {
let node_config = &node_config;
let dbc = create_connection(&node_config.node_config.cluster.database).await?;
let (dbc, _pgjh) = create_connection(&node_config.node_config.cluster.database).await?;
let dbc = Arc::new(dbc);
let node_disk_ident = &get_node_disk_ident(node_config, &dbc).await?;
let rows = dbc
@@ -437,7 +435,7 @@ pub async fn update_db_with_all_channel_configs(
}
pub async fn update_search_cache(node_config: &NodeConfigCached) -> Result<bool, Error> {
let dbc = create_connection(&node_config.node_config.cluster.database).await?;
let (dbc, _pgjh) = create_connection(&node_config.node_config.cluster.database).await?;
dbc.query("select update_cache()", &[])
.await
.err_conv()
@@ -554,7 +552,8 @@ pub async fn update_db_with_all_channel_datafiles(
node_disk_ident: &NodeDiskIdent,
ks_prefix: &str,
) -> Result<(), Error> {
let dbc = Arc::new(create_connection(&node_config.node_config.cluster.database).await?);
let (dbc, _pgjh) = create_connection(&node_config.node_config.cluster.database).await?;
let dbc = Arc::new(dbc);
let rows = dbc
.query(
"select rowid, facility, name from channels where facility = $1 order by facility, name",

View File

@@ -1,171 +0,0 @@
use super::get_node_disk_ident;
use super::update_db_with_channel_name_list;
use super::FindChannelNamesFromConfigReadDir;
use super::NodeDiskIdent;
use super::UpdatedDbWithChannelNames;
use crate::create_connection;
use crate::pg::Client as PgClient;
use err::Error;
use futures_util::Future;
use futures_util::Stream;
use netpod::NodeConfigCached;
use pin_project::pin_project;
use std::os::unix::prelude::OsStringExt;
use std::pin::Pin;
use std::task::Context;
use std::task::Poll;
#[pin_project]
struct UpdatedDbWithChannelNamesStream {
errored: bool,
data_complete: bool,
#[allow(dead_code)]
node_config: Pin<Box<NodeConfigCached>>,
// TODO can we pass a Pin to the async fn instead of creating static ref?
node_config_ref: &'static NodeConfigCached,
#[pin]
client_fut: Option<Pin<Box<dyn Future<Output = Result<PgClient, Error>> + Send>>>,
#[pin]
client: Option<PgClient>,
client_ref: Option<&'static PgClient>,
#[pin]
ident_fut: Option<Pin<Box<dyn Future<Output = Result<NodeDiskIdent, Error>> + Send>>>,
ident: Option<NodeDiskIdent>,
#[pin]
find: Option<FindChannelNamesFromConfigReadDir>,
#[pin]
update_batch: Option<Pin<Box<dyn Future<Output = Result<(), Error>> + Send>>>,
channel_inp_done: bool,
clist: Vec<String>,
}
impl UpdatedDbWithChannelNamesStream {
#[allow(unused)]
fn new(node_config: NodeConfigCached) -> Result<Self, Error> {
let node_config = Box::pin(node_config.clone());
let node_config_ref = unsafe { &*(&node_config as &NodeConfigCached as *const _) };
let mut ret = Self {
errored: false,
data_complete: false,
node_config,
node_config_ref,
client_fut: None,
client: None,
client_ref: None,
ident_fut: None,
ident: None,
find: None,
update_batch: None,
channel_inp_done: false,
clist: Vec::new(),
};
ret.client_fut = Some(Box::pin(create_connection(
&ret.node_config_ref.node_config.cluster.database,
)));
Ok(ret)
}
}
impl Stream for UpdatedDbWithChannelNamesStream {
type Item = Result<UpdatedDbWithChannelNames, Error>;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
use Poll::*;
let mut pself = self.project();
loop {
break if *pself.errored {
Ready(None)
} else if *pself.data_complete {
Ready(None)
} else if let Some(fut) = pself.find.as_mut().as_pin_mut() {
match fut.poll_next(cx) {
Ready(Some(Ok(item))) => {
pself
.clist
.push(String::from_utf8(item.file_name().into_vec()).unwrap());
continue;
}
Ready(Some(Err(e))) => {
*pself.errored = true;
Ready(Some(Err(e)))
}
Ready(None) => {
*pself.channel_inp_done = true;
// Work through the collected items
let l = std::mem::replace(pself.clist, Vec::new());
let fut = update_db_with_channel_name_list(
l,
pself.ident.as_ref().unwrap().facility,
pself.client.as_ref().get_ref().as_ref().unwrap(),
);
// TODO
//pself.update_batch.replace(Box::pin(fut));
let _ = fut;
continue;
}
Pending => Pending,
}
} else if let Some(fut) = pself.ident_fut.as_mut().as_pin_mut() {
match fut.poll(cx) {
Ready(Ok(item)) => {
*pself.ident_fut = None;
*pself.ident = Some(item);
let ret = UpdatedDbWithChannelNames {
msg: format!("Got ident {:?}", pself.ident),
count: 43,
};
let base_path = &pself
.node_config
.node
.sf_databuffer
.as_ref()
.ok_or_else(|| Error::with_msg(format!("missing sf databuffer config in node")))?
.data_base_path;
let s = FindChannelNamesFromConfigReadDir::new(base_path);
*pself.find = Some(s);
Ready(Some(Ok(ret)))
}
Ready(Err(e)) => {
*pself.errored = true;
Ready(Some(Err(e)))
}
Pending => Pending,
}
} else if let Some(fut) = pself.client_fut.as_mut().as_pin_mut() {
match fut.poll(cx) {
Ready(Ok(item)) => {
*pself.client_fut = None;
//*pself.client = Some(Box::pin(item));
//*pself.client_ref = Some(unsafe { &*(&pself.client.as_ref().unwrap() as &Client as *const _) });
*pself.client = Some(item);
let c2: &PgClient = pself.client.as_ref().get_ref().as_ref().unwrap();
*pself.client_ref = Some(unsafe { &*(c2 as *const _) });
//() == pself.node_config.as_ref();
//() == pself.client.as_ref().as_pin_ref().unwrap();
/* *pself.ident_fut = Some(Box::pin(get_node_disk_ident_2(
pself.node_config.as_ref(),
pself.client.as_ref().as_pin_ref().unwrap(),
)));*/
*pself.ident_fut = Some(Box::pin(get_node_disk_ident(
pself.node_config_ref,
pself.client_ref.as_ref().unwrap(),
)));
let ret = UpdatedDbWithChannelNames {
msg: format!("Client opened connection"),
count: 42,
};
Ready(Some(Ok(ret)))
}
Ready(Err(e)) => {
*pself.errored = true;
Ready(Some(Err(e)))
}
Pending => Pending,
}
} else {
Ready(None)
};
}
}
}

View File

@@ -34,8 +34,8 @@ pub async fn search_channel_databuffer(
" dtype, shape, unit, description, channel_backend",
" from searchext($1, $2, $3, $4)",
);
let cl = create_connection(&node_config.node_config.cluster.database).await?;
let rows = cl
let (pg, _pgjh) = create_connection(&node_config.node_config.cluster.database).await?;
let rows = pg
.query(
sql,
&[&query.name_regex, &query.source_regex, &query.description_regex, &"asc"],
@@ -115,7 +115,7 @@ pub async fn search_channel_scylla(query: ChannelSearchQuery, pgconf: &Database)
),
regop
);
let pgclient = crate::create_connection(pgconf).await?;
let (pgclient, _pgjh) = crate::create_connection(pgconf).await?;
let rows = pgclient
.query(sql, &[&ch_kind, &query.name_regex, &cb1, &cb2])
.await
@@ -182,7 +182,7 @@ async fn search_channel_archeng(
" order by c.name",
" limit 100"
));
let cl = create_connection(database).await?;
let (cl, _pgjh) = create_connection(database).await?;
let rows = cl.query(sql.as_str(), &[&query.name_regex]).await.err_conv()?;
let mut res = Vec::new();
for row in rows {
@@ -271,7 +271,7 @@ pub async fn search_channel(
node_config: &NodeConfigCached,
) -> Result<ChannelSearchResult, Error> {
let pgconf = &node_config.node_config.cluster.database;
if let Some(_scyconf) = node_config.node_config.cluster.scylla.as_ref() {
if let Some(_scyconf) = node_config.node_config.cluster.scylla_st() {
search_channel_scylla(query, pgconf).await
} else if let Some(conf) = node_config.node.channel_archiver.as_ref() {
search_channel_archeng(query, node_config.node_config.cluster.backend.clone(), conf, pgconf).await

125
crates/dbconn/src/worker.rs Normal file
View File

@@ -0,0 +1,125 @@
use crate::create_connection;
use async_channel::Receiver;
use async_channel::Sender;
use err::thiserror;
use err::ThisError;
use netpod::log::*;
use netpod::range::evrange::NanoRange;
use netpod::ChConf;
use netpod::Database;
use taskrun::tokio;
use tokio::task::JoinHandle;
use tokio_postgres::Client;
#[derive(Debug, ThisError)]
pub enum Error {
Error(#[from] err::Error),
ChannelSend,
ChannelRecv,
Join,
}
impl err::ToErr for Error {
fn to_err(self) -> err::Error {
err::Error::from_string(self)
}
}
#[derive(Debug)]
enum Job {
ChConfBestMatchingNameRange(String, String, NanoRange, Sender<Result<ChConf, Error>>),
ChConfForSeries(String, u64, Sender<Result<ChConf, Error>>),
}
#[derive(Debug, Clone)]
pub struct PgQueue {
tx: Sender<Job>,
}
impl PgQueue {
pub async fn chconf_for_series(
&self,
backend: &str,
series: u64,
) -> Result<Receiver<Result<ChConf, Error>>, Error> {
let (tx, rx) = async_channel::bounded(1);
let job = Job::ChConfForSeries(backend.into(), series, tx);
self.tx.send(job).await.map_err(|_| Error::ChannelSend)?;
Ok(rx)
}
pub async fn chconf_best_matching_name_range_job(
&self,
backend: &str,
name: &str,
range: NanoRange,
) -> Result<Receiver<Result<ChConf, Error>>, Error> {
let (tx, rx) = async_channel::bounded(1);
let job = Job::ChConfBestMatchingNameRange(backend.into(), name.into(), range, tx);
self.tx.send(job).await.map_err(|_| Error::ChannelSend)?;
Ok(rx)
}
}
#[derive(Debug)]
pub struct PgWorker {
rx: Receiver<Job>,
pg: Client,
pgjh: Option<JoinHandle<Result<(), err::Error>>>,
}
impl PgWorker {
pub async fn new(pgconf: &Database) -> Result<(PgQueue, Self), Error> {
let (tx, rx) = async_channel::bounded(64);
let (pg, pgjh) = create_connection(pgconf).await?;
let queue = PgQueue { tx };
let worker = Self {
rx,
pg,
pgjh: Some(pgjh),
};
Ok((queue, worker))
}
pub async fn work(self) -> Result<(), Error> {
loop {
let x = self.rx.recv().await;
let job = match x {
Ok(x) => x,
Err(_) => {
error!("PgWorker can not receive from channel");
return Err(Error::ChannelRecv);
}
};
match job {
Job::ChConfBestMatchingNameRange(backend, name, range, tx) => {
let res =
crate::channelconfig::chconf_best_matching_for_name_and_range(&backend, &name, range, &self.pg)
.await;
if tx.send(res.map_err(Into::into)).await.is_err() {
// TODO count for stats
}
}
Job::ChConfForSeries(backend, series, tx) => {
let res = crate::channelconfig::chconf_for_series(&backend, series, &self.pg).await;
if tx.send(res.map_err(Into::into)).await.is_err() {
// TODO count for stats
}
}
}
}
}
pub async fn join(&mut self) -> Result<(), Error> {
if let Some(jh) = self.pgjh.take() {
jh.await.map_err(|_| Error::Join)?.map_err(Error::from)?;
Ok(())
} else {
Ok(())
}
}
pub fn close(&self) {
self.rx.close();
}
}