Factor out ioc finder to db crate

This commit is contained in:
Dominik Werder
2023-08-28 22:58:47 +02:00
parent 837265a7b3
commit e05970ef56
29 changed files with 617 additions and 633 deletions

View File

@@ -1,20 +1,7 @@
use err::thiserror;
use err::ThisError;
#[derive(Debug)]
pub struct Msg(pub String);
#[derive(Debug, ThisError)]
pub enum Error {
Postgres(#[from] tokio_postgres::Error),
Msg(Msg),
}
impl Error {
pub fn from_msg<T>(msg: T) -> Self
where
T: Into<String>,
{
Self::Msg(Msg(msg.into()))
}
}

85
dbpg/src/findaddr.rs Normal file
View File

@@ -0,0 +1,85 @@
use crate::conn::PgClient;
use err::thiserror;
use err::ThisError;
use log::*;
use std::net::SocketAddrV4;
#[derive(Debug, ThisError)]
pub enum Error {
Postgres(#[from] tokio_postgres::Error),
IocAddrNotFound,
}
pub async fn find_channel_addr(backend: &str, name: String, pg: &PgClient) -> Result<Option<SocketAddrV4>, Error> {
let qu_find_addr = pg
.prepare(
"select t1.facility, t1.channel, t1.addr from ioc_by_channel_log t1 where t1.facility = $1 and t1.channel = $2 and addr is not null order by tsmod desc limit 1",
)
.await?;
let rows = pg.query(&qu_find_addr, &[&backend, &name]).await?;
if rows.is_empty() {
error!("can not find any addresses of channels {:?}", name);
Err(Error::IocAddrNotFound)
} else {
for row in rows {
match row.try_get::<_, &str>(2) {
Ok(addr) => match addr.parse::<SocketAddrV4>() {
Ok(addr) => return Ok(Some(addr)),
Err(e) => {
error!("can not parse {e:?}");
return Err(Error::IocAddrNotFound);
}
},
Err(e) => {
error!("can not find addr for {name} {e:?}");
}
}
}
Ok(None)
}
}
#[allow(unused)]
async fn query_addr_multiple(backend: &str, pg_client: &PgClient) -> Result<(), Error> {
// TODO factor the find loop into a separate Stream.
let sql = concat!(
"with q1 as (select t1.facility, t1.channel, t1.addr from ioc_by_channel_log t1",
" where t1.facility = $1",
" and t1.channel in ($2, $3, $4, $5, $6, $7, $8, $9)",
" and t1.addr is not null order by t1.tsmod desc)",
" select distinct on (q1.facility, q1.channel) q1.facility, q1.channel, q1.addr from q1"
);
let qu_find_addr = pg_client.prepare(sql).await?;
let mut chns_todo: &[String] = err::todoval();
let mut chstmp = ["__NONE__"; 8];
for (s1, s2) in chns_todo.iter().zip(chstmp.iter_mut()) {
*s2 = s1;
}
chns_todo = &chns_todo[chstmp.len().min(chns_todo.len())..];
let rows = pg_client
.query(
&qu_find_addr,
&[
&backend, &chstmp[0], &chstmp[1], &chstmp[2], &chstmp[3], &chstmp[4], &chstmp[5], &chstmp[6],
&chstmp[7],
],
)
.await?;
for row in rows {
let ch: &str = row.get(1);
let addr: &str = row.get(2);
if addr == "" {
// TODO the address was searched before but could not be found.
} else {
let addr: SocketAddrV4 = match addr.parse() {
Ok(k) => k,
Err(e) => {
error!("can not parse {addr:?} for channel {ch:?} {e:?}");
continue;
}
};
let _ = addr;
}
}
Ok(())
}

View File

@@ -1,5 +1,8 @@
pub mod conn;
pub mod err;
pub mod findaddr;
pub mod iocindex;
pub mod pool;
pub mod schema;
pub mod seriesbychannel;
pub mod seriesid;

View File

@@ -14,7 +14,6 @@ pub enum Error {
EndOfPool,
ChannelRecv(#[from] RecvError),
ChannelSend,
Msg(String),
}
impl From<crate::err::Error> for Error {
@@ -22,7 +21,6 @@ impl From<crate::err::Error> for Error {
type G = crate::err::Error;
match value {
G::Postgres(e) => Error::Postgres(e),
G::Msg(e) => Error::Msg(e.0),
}
}
}

View File

@@ -1,7 +1,23 @@
use crate::conn::PgClient;
use crate::err::Error;
use err::thiserror;
use err::ThisError;
use log::*;
#[derive(Debug, ThisError)]
pub enum Error {
Postgres(#[from] tokio_postgres::Error),
LogicError(String),
}
impl Error {
pub fn from_logic_msg<T>(msg: T) -> Self
where
T: Into<String>,
{
Self::LogicError(msg.into())
}
}
async fn has_column(table: &str, column: &str, pgc: &PgClient) -> Result<bool, Error> {
let rows = pgc
.query(
@@ -16,12 +32,15 @@ async fn has_column(table: &str, column: &str, pgc: &PgClient) -> Result<bool, E
} else if c == 1 {
Ok(true)
} else {
Err(Error::from_msg(format!("has_columns bad count {}", c)))
Err(Error::from_logic_msg(format!("has_columns bad count {}", c)))
}
} else if rows.len() == 0 {
Ok(false)
} else {
Err(Error::from_msg(format!("has_columns bad row count {}", rows.len())))
Err(Error::from_logic_msg(format!(
"has_columns bad row count {}",
rows.len()
)))
}
}

291
dbpg/src/seriesbychannel.rs Normal file
View File

@@ -0,0 +1,291 @@
use async_channel::Receiver;
use async_channel::Sender;
use err::thiserror;
use err::ThisError;
use futures_util::StreamExt;
use log::*;
use md5::Digest;
use netpod::Database;
use series::series::Existence;
use series::SeriesId;
use std::time::Duration;
use std::time::Instant;
use taskrun::tokio;
use tokio::task::JoinHandle;
use tokio_postgres::Client as PgClient;
use tokio_postgres::Statement as PgStatement;
#[derive(Debug, ThisError)]
pub enum Error {
Postgres(#[from] tokio_postgres::Error),
CreateSeriesFail,
SeriesMissing,
ChannelError,
}
impl From<crate::err::Error> for Error {
fn from(value: crate::err::Error) -> Self {
use crate::err::Error as A;
match value {
A::Postgres(x) => Self::Postgres(x),
}
}
}
pub struct ChannelInfoQuery {
pub backend: String,
pub channel: String,
pub scalar_type: i32,
pub shape_dims: Vec<i32>,
pub tx: Sender<Result<Existence<SeriesId>, Error>>,
}
impl ChannelInfoQuery {
pub fn dummy(&self) -> Self {
Self {
backend: String::new(),
channel: String::new(),
scalar_type: -1,
shape_dims: Vec::new(),
tx: self.tx.clone(),
}
}
}
struct ChannelInfoResult {
series: Existence<SeriesId>,
tx: Sender<Result<Existence<SeriesId>, Error>>,
}
struct Worker {
pg: PgClient,
qu_select: PgStatement,
qu_insert: PgStatement,
batch_rx: Receiver<Vec<ChannelInfoQuery>>,
}
impl Worker {
async fn new(db: &Database, batch_rx: Receiver<Vec<ChannelInfoQuery>>) -> Result<Self, Error> {
let pg = crate::conn::make_pg_client(db).await?;
let sql = concat!(
"with q1 as (select * from unnest($1::text[], $2::text[], $3::int[], $4::text[], $5::int[])",
" as inp (backend, channel, scalar_type, shape_dims, rid))",
" select t.series, q1.rid from series_by_channel t",
" join q1 on t.facility = q1.backend and t.channel = q1.channel",
" and t.scalar_type = q1.scalar_type and t.shape_dims = q1.shape_dims::int[]",
" and t.agg_kind = 0",
" order by q1.rid",
);
let qu_select = pg.prepare(sql).await?;
let sql = concat!(
"with q1 as (select * from unnest($1::text[], $2::text[], $3::int[], $4::text[], $5::bigint[])",
" as inp (backend, channel, scalar_type, shape_dims, series))",
" insert into series_by_channel (series, facility, channel, scalar_type, shape_dims, agg_kind)",
" select series, backend, channel, scalar_type, shape_dims::int[], 0 from q1",
" on conflict do nothing"
);
let qu_insert = pg.prepare(sql).await?;
let ret = Self {
pg,
qu_select,
qu_insert,
batch_rx,
};
Ok(ret)
}
async fn select(
&self,
batch: Vec<ChannelInfoQuery>,
) -> Result<(Vec<ChannelInfoResult>, Vec<ChannelInfoQuery>), Error> {
let mut backend = Vec::new();
let mut channel = Vec::new();
let mut scalar_type = Vec::new();
let mut shape_dims = Vec::new();
let mut shape_dims_str = Vec::new();
let mut rid = Vec::new();
let mut tx = Vec::new();
for (i, e) in batch.into_iter().enumerate() {
backend.push(e.backend);
channel.push(e.channel);
scalar_type.push(e.scalar_type);
let mut dims = String::with_capacity(32);
dims.push('{');
for (i, &v) in e.shape_dims.iter().enumerate() {
if i > 0 {
dims.push(',');
}
use std::fmt::Write;
write!(dims, "{}", v).unwrap();
}
dims.push('}');
shape_dims_str.push(dims);
shape_dims.push(e.shape_dims);
rid.push(i as i32);
tx.push((i as u32, e.tx));
}
let rows = self
.pg
.query(
&self.qu_select,
&[&backend, &channel, &scalar_type, &shape_dims_str, &rid],
)
.await?;
let mut result = Vec::new();
let mut missing = Vec::new();
let mut it1 = rows.into_iter();
let mut e1 = it1.next();
for (qrid, tx) in tx {
if let Some(row) = &e1 {
let rid: i32 = row.get(1);
if rid as u32 == qrid {
let series: i64 = row.get(0);
let series = SeriesId::new(series as _);
let res = ChannelInfoResult {
series: Existence::Existing(series),
tx,
};
result.push(res);
}
e1 = it1.next();
} else {
let i = qrid as usize;
let k = ChannelInfoQuery {
backend: backend[i].clone(),
channel: channel[i].clone(),
scalar_type: scalar_type[i].clone(),
shape_dims: shape_dims[i].clone(),
tx,
};
missing.push(k);
}
}
Ok((result, missing))
}
async fn insert_missing(&self, batch: &Vec<ChannelInfoQuery>) -> Result<(), Error> {
let tsbeg = Instant::now();
let mut backends = Vec::new();
let mut channels = Vec::new();
let mut scalar_types = Vec::new();
let mut shape_dimss = Vec::new();
let mut shape_dims_strs = Vec::new();
let mut hashers = Vec::new();
for e in batch.into_iter() {
{
let mut h = md5::Md5::new();
h.update(e.backend.as_bytes());
h.update(e.channel.as_bytes());
h.update(format!("{:?}", e.scalar_type).as_bytes());
h.update(format!("{:?}", e.shape_dims).as_bytes());
hashers.push(h);
}
backends.push(&e.backend);
channels.push(&e.channel);
scalar_types.push(e.scalar_type);
let mut dims = String::with_capacity(32);
dims.push('{');
for (i, &v) in e.shape_dims.iter().enumerate() {
if i > 0 {
dims.push(',');
}
use std::fmt::Write;
write!(dims, "{}", v).unwrap();
}
dims.push('}');
shape_dims_strs.push(dims);
shape_dimss.push(&e.shape_dims);
}
let mut i1 = 0;
loop {
i1 += 1;
if i1 >= 200 {
return Err(Error::CreateSeriesFail);
}
let mut seriess = Vec::with_capacity(hashers.len());
let mut all_good = true;
for h in &mut hashers {
let mut good = false;
for _ in 0..50 {
h.update(tsbeg.elapsed().subsec_nanos().to_ne_bytes());
let f = h.clone().finalize();
let series = u64::from_le_bytes(f.as_slice()[0..8].try_into().unwrap());
if series >= 100000000000000000 && series <= i64::MAX as u64 {
seriess.push(series as i64);
good = true;
break;
}
}
if !good {
all_good = false;
break;
}
}
if !all_good {
continue;
}
self.pg
.execute(
&self.qu_insert,
&[&backends, &channels, &scalar_types, &shape_dims_strs, &seriess],
)
.await?;
break;
}
Ok(())
}
async fn work(&mut self) -> Result<(), Error> {
'outer: while let Some(batch) = self.batch_rx.next().await {
let (res1, missing) = self.select(batch).await?;
let res3 = if missing.len() > 0 {
self.insert_missing(&missing).await?;
let (res2, missing2) = self.select(missing).await?;
if missing2.len() > 0 {
Err(Error::SeriesMissing)
} else {
Ok(res2)
}
} else {
Ok(res1)
};
let res4 = res3?;
for r in res4 {
match r.tx.send(Ok(r.series)).await {
Ok(()) => {}
Err(_e) => {
warn!("can not deliver result");
break 'outer;
}
}
}
}
info!("Worker done");
Ok(())
}
}
pub async fn start_lookup_workers(
worker_count: usize,
db: &Database,
) -> Result<
(
Sender<ChannelInfoQuery>,
Vec<JoinHandle<Result<(), Error>>>,
JoinHandle<()>,
),
Error,
> {
let inp_cap = 128;
let batch_out_cap = 4;
let timeout = Duration::from_millis(400);
let (query_tx, query_rx) = async_channel::bounded(inp_cap);
let (batch_rx, bjh) = batchtools::batcher::batch(inp_cap, timeout, batch_out_cap, query_rx);
let mut jhs = Vec::new();
for _ in 0..worker_count {
let mut worker = Worker::new(db, batch_rx.clone()).await?;
let jh = tokio::task::spawn(async move { worker.work().await });
jhs.push(jh);
}
Ok((query_tx, jhs, bjh))
}

93
dbpg/src/seriesid.rs Normal file
View File

@@ -0,0 +1,93 @@
use crate::conn::PgClient;
use err::thiserror;
use err::ThisError;
use log::*;
use netpod::ScalarType;
use netpod::Shape;
use series::series::Existence;
use series::SeriesId;
use std::time::Duration;
use std::time::Instant;
use taskrun::tokio;
#[derive(Debug, ThisError)]
pub enum Error {
Postgres(#[from] tokio_postgres::Error),
IocAddrNotFound,
BadIdGenerated,
CanNotInsertSeriesId,
}
// TODO don't need byte_order or compression from ChannelDescDecoded for channel registration.
pub async fn get_series_id(
name: &str,
scalar_type: &ScalarType,
shape: &Shape,
pg_client: &PgClient,
backend: String,
) -> Result<Existence<SeriesId>, Error> {
let channel_name = name;
let scalar_type = scalar_type.to_scylla_i32();
let shape = shape.to_scylla_vec();
let res = pg_client
.query(
"select series from series_by_channel where facility = $1 and channel = $2 and scalar_type = $3 and shape_dims = $4 and agg_kind = 0",
&[&backend, &channel_name, &scalar_type, &shape],
)
.await?;
let mut all = Vec::new();
for row in res {
let series: i64 = row.get(0);
let series = series as u64;
all.push(series);
}
let rn = all.len();
let tsbeg = Instant::now();
if rn == 0 {
use md5::Digest;
let mut h = md5::Md5::new();
h.update(backend.as_bytes());
h.update(channel_name.as_bytes());
h.update(format!("{:?}", scalar_type).as_bytes());
h.update(format!("{:?}", shape).as_bytes());
for _ in 0..200 {
h.update(tsbeg.elapsed().subsec_nanos().to_ne_bytes());
let f = h.clone().finalize();
let series = u64::from_le_bytes(f.as_slice()[0..8].try_into().unwrap());
if series > i64::MAX as u64 {
continue;
}
if series == 0 {
continue;
}
if series <= 0 || series > i64::MAX as u64 {
return Err(Error::BadIdGenerated);
}
let sql = concat!(
"insert into series_by_channel",
" (series, facility, channel, scalar_type, shape_dims, agg_kind)",
" values ($1, $2, $3, $4, $5, 0) on conflict do nothing"
);
let res = pg_client
.execute(sql, &[&(series as i64), &backend, &channel_name, &scalar_type, &shape])
.await
.unwrap();
if res == 1 {
let series = Existence::Created(SeriesId::new(series));
return Ok(series);
} else {
warn!(
"tried to insert {series:?} for {backend:?} {channel_name:?} {scalar_type:?} {shape:?} trying again..."
);
}
tokio::time::sleep(Duration::from_millis(20)).await;
}
error!("tried to insert new series id for {backend:?} {channel_name:?} {scalar_type:?} {shape:?} but failed");
Err(Error::CanNotInsertSeriesId)
} else {
let series = all[0] as u64;
let series = Existence::Existing(SeriesId::new(series));
debug!("get_series_id {backend:?} {channel_name:?} {scalar_type:?} {shape:?} {series:?}");
Ok(series)
}
}