seweb/influxdb.py

from influxdb_client import InfluxDBClient
from configparser import ConfigParser
import ast
from datetime import datetime

class InfluxDB:
    """
    Class used to handle the connection with the InfluxDB instance
    """
    def __init__(self):
        config = ConfigParser()
        config.read("./config/config.ini")
        self._client = InfluxDBClient(url=config["INFLUX"]["url"], token=config["INFLUX"]["token"],
                                      org=config["INFLUX"]["org"])

    def disconnet(self):
        """
        Disconnects from the InfluxDB instance
        """
        self._client.close()

    def query(self, query_str):
        """
        Executes the query on the InfluxDB instance

        Parameters :
            query_str (string) : the Flux query to execute

        Returns :
            TableList : an InfluxDB list of the tables returned by the query
        """
        return self._client.query_api().query(query_str)

class PrettyFloat(float):
    """saves bandwidth when converting to JSON

    a lot of numbers originally have a fixed (low) number of decimal digits
    as the binary representation is not exact, it might happen, that a
    lot of superfluous digits are transmitted:

    str(1/10*3) == '0.30000000000000004'
    str(PrettyFloat(1/10*3)) == '0.3'
    """
    def __repr__(self):
        return '%.15g' % self

class InfluxDataGetter:
    def __init__(self, db, influx_instrument_config):
        self._influx_instrument_config = influx_instrument_config
        self._bucket = self._influx_instrument_config["bucket"]
        self._db = db

# ----- PUBLIC METHODS

    def get_available_variables_at_time(self, times, all=False):
        """
        Gets the available variables (those that we can have a value for since the device has been installed on the instrument) at the given point in time.
        We can get the last available variables at the given point in time or all the known variables for the day corresponding to the timestamp.

        Parameters :
            times ([int]) : the unix timestamps in seconds of the range. The first value can be unused. The last can represent the point in time.
            all (bool) : indicates if we want all the variables for the given times[1] timestamp (all the day)

        Returns :
            [{"tag":(str), "unit":(str), "curves":[{"name":(str), "label":(str), "color":(str)}]}] : a list of dictionnaries, each one representing
            a block of curves with their name, their label and their color to display, grouped by their tag (which can be the unit augmented with an index) and their unit.
        """

        all_setup_info = self._get_all_setup_info_as_dict(times, all)
        available_variables = self._extract_variables(all_setup_info)
        available_variables = self._remove_variables_without_value_float(available_variables, times)
        available_variables = self._set_variables_with_target(available_variables, times)
        res = self._group_variables_by_unit(available_variables)

        return res

    def get_curves_in_timerange(self, variables, time, interval = None):
        """
        Gets the curves for the given variables within a timerange.

        Parameters :
            variables ([(str)]) : an array of variable names (Influx) to get the curves for
            time ([int]) : the timerange we want the values in. It consists of two values which are Unix timestamps in seconds, first included, second excluded.
            interval (int) : the interval (resolution) of the values to get (in nanoseconds)

        Returns :
            {(str):[[(int), (float)]]} : a dictionnary of curves. The key is the name of the influx variable, and the value is an array of pairs (also arrays), the first value being the Unix timestamp in second (x), the seconds being the value (y).
        """
        res = {}
        for variable in variables:

            variable_name_for_query = variable
            is_target = False
            if variable_name_for_query.endswith(".target"):
                variable_name_for_query = variable_name_for_query[:-len(".target")]
                is_target = True

            curve = self._get_curve(variable_name_for_query, is_target, time, interval)
            if len(curve) > 0:
                res[variable] = curve

        return res

    def poll_last_values(self, variables, lastvalues, end_time):
        """
        Polls the lastest values for the given variables since their last known point to end_time.

        Parameters :
            variables ([(str)]) : an array of variable names (Influx) to get the last known values for
            end_time (int) : the Unix timestamp in seconds of the last point in time to include the values in

        Returns :
            {(str):[[(int), (float)]]} : a dictionnary of points. The key is the name of the influx variable, and the value is an array of pairs (also array), the first value being the Unix timestamp in second (x), the seconds being the value (y).
        """
        res = {}
        for variable in variables:

            variable_name_for_query = variable
            is_target = False
            if variable_name_for_query.endswith(".target"):
                variable_name_for_query = variable_name_for_query[:-len(".target")]
                is_target = True
            start_time = int(lastvalues[variable_name_for_query][0]) if variable_name_for_query in lastvalues.keys() else None
            points = self._get_last_values(variable_name_for_query, is_target,start_time, end_time)
            if len(points) > 0 :
                res[variable] = points
        return res

# ----- PRIVATE METHODS

    def _get_all_setup_info_as_dict(self, times, all=False):
        """
        Gets the value of the field setup_info in the measurements nicos/se_main, nicos/se_stick, nicos/se_addons as an array of Python dicts.
        Takes the last setup_info dict (for each measurement) known at times[1], or all the dicts for this day + the previous known (also for each)

        Parameters
            times ([int]) : the unix timestamps in seconds of the range. The first value can be unused. The last can represent the point in time.
            all (bool) : indicates if we want all the variables for the given times[1] timestamp (all the day)

        Returns :
            [{(str):((str), {...})}]: an array of the parsed "setup_info dict" of each measurements. The key is the secop_module prefixed with "se_", and the value is a tuple with its first value
            being the type of Secop device for this module, and the value is too big to give its signature. Some tuple examples can be found under graphs/setup_info_examples.

        """
        measurements = ["nicos/se_main", "nicos/se_stick", "nicos/se_addons"]
        res = []
        for measurement in measurements:
            to_add = []
            query = f"""
            from(bucket: "{self._bucket}")
            |> range(start: {times[0] if all=="True" else 0}, stop: {times[1] + 1})
            |> filter(fn: (r) => r._measurement == "{measurement}")
            |> filter(fn: (r) => r._field == "setup_info")
            {"" if all=="True" else "|> last()"}
            |> yield(name: "res")
            """
            tables = self._db.query(query)
            for table in tables:
                for record in table.records:
                    to_add.append(ast.literal_eval(record.get_value()))

            if all == "True":
                query = f"""
                from(bucket: "{self._bucket}")
                |> range(start: 0, stop: {times[0]+1})
                |> filter(fn: (r) => r._measurement == "{measurement}")
                |> filter(fn: (r) => r._field == "setup_info")
                |> last()
                |> yield(name: "res")
                """
                tables = self._db.query(query)
                for table in tables:
                    for record in table.records:
                        to_add.append(ast.literal_eval(record.get_value()))
            res.extend(to_add)
        return res

    def _extract_variables(self, all_setup_info_dict):
        """
        Extracts relevant information out of the setup_info dict for each available variable in measurements nicos/se_main, nicos/se_stick, nicos/se_addons

        Parameters :
            all_setup_info_dict ([{(str):((str), {...})}]) : an array of the parsed "setup_info dict" of each measurements. The key is the secop_module prefixed with "se_", and the value is a tuple with its first value
            being the type of Secop device for this module, and the value is too big to give its signature. Some tuple examples can be found under graphs/setup_info_examples.

        Returns :
            [{"name":(str), "label":(str), "unit":(str), "has_potential_target":(bool)}] : an array of dictionnaries, each containing the Influx name of the corresponding variable out of the setup_info dict,
            the label to display in the Web GUI, its unit and a boolean value indicating if the variable has a potential target available.

        """
        available_varirables = []
        added_names = []
        for setup_info_dict in all_setup_info_dict:
            for (setup_info_variable_name, content) in setup_info_dict.items():
                if content[0] != "nicos.devices.secop.devices.SecopDevice":
                    name = self._transform_setup_info_variable_name_to_influx(setup_info_variable_name)
                    if name not in added_names:
                        available_varirables.append(
                            {
                            "name":name,
                            "label":content[1]["secop_module"],
                            "unit":content[1]["unit"],
                            "has_potential_target": "target_datainfo" in content[1].keys()
                            }
                        )
                        added_names.append(name)
        return available_varirables

    def _transform_setup_info_variable_name_to_influx(self, setup_info_name):
        """
        Transforms the name of the variable available in the setup_info dict into the Influx name.

        Parameters :
            setup_info_name (str) : the name of the variable in the setup_info dict.

        Returns :
            str : the transformed variable name that matches the Influx names reqauirements
        """
        return self._influx_instrument_config["measurement_prefix"] + setup_info_name.lower()[len(self._influx_instrument_config["setup_info_prefix"]):]

    def _remove_variables_without_value_float(self, available_variables, times):
        """
        Removes some of the previously identified available_variables if they effectively do not have a value_float field in InfluxDB.

        Parameters :
            available_variables ([{"name":(str), "label":(str), "unit":(str), "has_potential_target":(bool)}]) : an array of dictionnaries, each containing the Influx name of the corresponding variable out of the setup_info dict,
            the label to display in the Web GUI, its unit and a boolean value indicating if the variable has a potential target available.
            times ([int]): (only second value used) the unix timestamps in seconds of the range at which we want to get the available variables (for the value).

        Returns :
            [{"name":(str), "label":(str), "unit":(str), "has_potential_target":(bool)}] : an array of dictionnaries (updated), each containing the Influx name of the corresponding variable out of the setup_info dict,
            the label to display in the Web GUI, its unit and a boolean value indicating if the variable has a potential target available.
        """
        res = []
        for variable in available_variables:
            query = f"""
                import "influxdata/influxdb/schema"
                schema.measurementFieldKeys(bucket: "{self._bucket}", measurement: "{variable["name"]}", start:0, stop: {times[1] + 1})
                |> yield(name: "res")
            """
            records = self._db.query(query)[0].records
            if "value_float" in [record.get_value() for record in records]:
                res.append(variable)

        return res

    def _set_variables_with_target(self, available_variables, times):
        """
        Determines if the previously identified available_variables have effectively a target or not (meaning it has a target_float field in Influx).

        Parameters :
            available_variables ([{"name":(str), "label":(str), "unit":(str), "has_potential_target":(bool)}]) : an array of dictionnaries, each containing the Influx name of the corresponding variable out of the setup_info dict,
            the label to display in the Web GUI, its unit and a boolean value indicating if the variable has a potential target available.
            times ([int]): (only second value used) the unix timestamps in seconds of the range at which we want to get the available variables (for the target).

        Returns :
            [{"name":(str), "label":(str), "unit":(str), "has_potential_target":(bool)}] : an array of dictionnaries, each containing the Influx name of the corresponding variable out of the setup_info dict,
            the label to display in the Web GUI, its unit and a boolean value indicating if the variable has a potential target available (updated).
        """

        for variable in available_variables:
            if variable["has_potential_target"]:
                query = f"""
                    import "influxdata/influxdb/schema"
                    schema.measurementFieldKeys(bucket: "{self._bucket}", measurement: "{variable["name"]}", start:0, stop: {times[1] + 1})
                    |> yield(name: "res")
                """
                records = self._db.query(query)[0].records
                if not "target_float" in [record.get_value() for record in records]:
                    variable["has_potential_target"] = False

        return available_variables

    def _group_variables_by_unit(self, available_variables):
        """Performs a group by unit, while removing useless information and adding target curves.

        Parameters :
            available_variables ([{"name":(str), "label":(str), "unit":(str), "has_potential_target":(bool)}]) : an array of dictionnaries, each containing the Influx name of the corresponding variable out of the setup_info dict,
            the label to display in the Web GUI, its unit and a boolean value indicating if the variable has a target available.

        Returns :

            [{"tag":(str), "unit":(str), "curves":[{"name":(str), "label":(str), "color":(str)}]] : a list of dictionnaries, each one representing
            a block of curves with their name, their label and their color to display, grouped by their tag (which can be the unit augmented with an index) and their unit.
        """
        groups = {}

        for available_variable in available_variables:
            if available_variable["has_potential_target"]:
                target_variable = self._get_formatted_target_variable(available_variable)
                self._append_variable(groups, target_variable)
            self._append_variable(groups, available_variable)

        return list(groups.values())

    def _get_formatted_target_variable(self, variable):
        """
        Formats the variable which has a target to be added to the unit groups, meaning it adds ".target" as the suffix in the Influx name and label, and removes the "has_target" value.

        Parameters :
            variable ({"name":(str), "label":(str), "unit":(str), "has_potential_target":(bool)}) : a dictionnary containing the Influx name of the corresponding variable out of the setup_info dict,
            the label to display in the Web GUI, its unit and a boolean value indicating if the variable has a target available.

        Returns :
            {"name":(str), "label":(str), "unit":(str)} : a dictionnary containing the Influx name of the corresponding variable out of the setup_info dict (augmented with ".target" suffix),
            the label (augmented with ".target" suffix) to display in the Web GUI.
        """

        return {
            "name":variable["name"]+".target",
            "label":variable["name"][len(self._influx_instrument_config["measurement_prefix"]):]+".target",
            "unit":variable["unit"]
        }

    def _append_variable(self, groups, variable):
        """
        Appends the variable in the unit group with a tag and a color, and creates the unit key if not available.

        Parameters :
            groups ({}) : a dictionnary that contains the curves grouped by unit, which will be updated
            variable ({"name":(str), "label":(str), "unit":(str)[,"has_potential_target":(bool)]}) : a dictionnary containing the Influx name of the corresponding variable out of the setup_info dict,
            the label to display in the Web GUI, its unit and possibly a boolean value indicating if the variable has a target available.
        """

        if variable["unit"] not in groups.keys():
            groups[variable["unit"]] = {"tag":variable["unit"], "unit":variable["unit"], "curves":[]}
        groups[variable["unit"]]["curves"].append({
            "name":variable["name"],
            "label":variable["label"],
            "color":""
        })

    def _get_curve(self, variable, is_target, time, interval=None):
        """
        Gets the points (curve) within a timerange for the given variable.

        Parameters :
            variable (str) : the name (Influx) of the variable we want the values of.
            is_target (bool) : tells if the given variable is a target, or not (if variable is "nicos/se_t_chip.target", then is_target has to be set to True)
            time ([(int)]) : the timerange we want the values in. It consists of two values which are Unix timestamps in seconds, first included, second excluded.
            interval (int) : the interval (resolution) of the values to get (in nanoseconds)

        Returns :
            [[(int), (float)]] : an array of pairs (also arrays), the first value being the Unix timestamp in second (x), the seconds being the value (y)
        """
        res = []
        query = f"""
            from(bucket: "{self._bucket}")
            |> range(start: {time[0]}, stop: {time[1] + 1})
            |> filter(fn : (r) => r._measurement == "{variable}")
            |> filter(fn : (r) => r._field == "{"target_float" if is_target else "value_float"}")
            {"|> aggregateWindow(every: duration(v:"+str(interval)+"), fn: last, createEmpty:false)" if interval else ""}
            |> keep(columns: ["_time","_value"])
            |> yield(name: "res")
            """
        tables = self._db.query(query)
        for table in tables:
            for record in table.records:
                t = round(datetime.timestamp(record.get_time()), 3)
                value = record.get_value()
                try:
                    value = PrettyFloat(value)
                except:
                    value = None
                res.append([t, value])

        return self._insert_last_known_value(variable, is_target, res, time)

    def _insert_last_known_value(self, variable, is_target, curve, time):
        """
        Adds the last known value as the first point in the curve if the last known value is outside the viewing window.

        Parameters :
            variable (str) : the name (Influx) of the variable we want the values of.
            is_target (bool) : tells if the given variable is a target, or not (if variable is "nicos/se_t_chip.target", then is_target has to be set to True)
            curve ([[(int), (float)]]) : an array of pairs (arrays), the first value being the Unix timestamp in second (x), the seconds being the value (y)
            time ([(int)]) : the timerange we want the values in. It consists of two values which are Unix timestamps in seconds, first included, second excluded.

        Returns :
            [[(int), (float)]] : the curve of the parameter, updated with a potential new first point
        """

        if len(curve) == 0 or curve[0][0] != time[0]:
            query = f"""
            from(bucket: "{self._bucket}")
            |> range(start: 0, stop: {time[0]+1})
            |> filter(fn : (r) => r._measurement == "{variable}")
            |> filter(fn : (r) => r._field == "{"target_float" if is_target else "value_float"}")
            |> last()
            |> keep(columns: ["_value"])
            |> yield(name: "res")
            """
            tables = self._db.query(query)

            for table in tables:
                for record in table.records:
                    value = record.get_value()
                    try:
                        value = PrettyFloat(value)
                    except:
                        value = None
                    curve.insert(0, [time[0], value])
        return curve

    def _get_last_values(self, variable, is_target, start_time, end_time):
        """
        Gets the lastest values for the given variable that are in [start_time, end_time].

        Parameters :
            variable (str) : the name (Influx) of the variable we want the last value of.
            is_target (bool) : tells if the given variable is a target, or not (if variable is "nicos/se_t_chip.target", then is_target has to be set to True)
            start_time (int|None) : the start of time range (Unix timestamp in seconds) to include the values in
            end_time (int) : the end of time range (Unix timestamp in seconds) to include the values in

        Returns :
            [[(int), (float)]] : an array of points (also arrays). The first value is the Unix timestamp in second (x), the seconds is the value (y)
        """

        res = []
        query = f"""
            from(bucket: "{self._bucket}")
            |> range(start: {start_time if start_time != None else 0}, stop: {end_time+1})
            |> filter(fn : (r) => r._measurement == "{variable}")
            |> filter(fn : (r) => r._field == "{"target_float" if is_target else "value_float"}")
            {"|> last()" if start_time == None else ""}
            |> keep(columns: ["_time","_value"])
            |> yield(name: "res")
            """

        # this loop might be simplified, but it has to be kept to catch the case when there is unavailable data
        tables = self._db.query(query)
        for table in tables:
            for record in table.records:
                t = round(datetime.timestamp(record.get_time()), 3)
                value = record.get_value()
                try:
                    value = PrettyFloat(value)
                except:
                    value = None
                res.append([t, value])
        return res