Source code for febid.Statistics

"""
Module for continuous process data recording
"""
import random as rnd
import sys
import timeit
from math import floor, log

import numpy as np
import pandas as pd


[docs]class Statistics():
    """
    Class implementing statistics gathering and saving(to excel).

        Report contains following columns:

    Time, Time passed, Simulation time, Simulation speed, N of cells(filled), Volume, Min.precursor coverage, Growth rate

        It is possible to automatically include graphs into Excel files
        Additionally, initial simulation parameters are added to 3 separate sheets
    """

    def __init__(self, filename=f'run_id{rnd.randint(100000, 999999)}'):
        self.filename = filename + '.xlsx'
        self.sheet_name = 'Data'
        self.columns = ['Time', 'Time passed', 'Sim.time', 'Min.precursor coverage', 'Volume', 'Max. temperature',]
        # self.units = ['', 's', 's', '', '', '1/s', '1/s']
        self.data = pd.DataFrame(columns=self.columns)
        self.data.loc[0] = [pd.Timestamp.now(), 0, 0, 0, 0, 0]
        self.step = self.data.copy()
        self.parameters = []
        self.parameters_units = []
        self.writer = None
        self.save_freq = 10 # seconds
        self.last_row = 0 # last row recorded previously
        self.time = timeit.default_timer()

        # Creating new file, old file is overwritten
        filename = self.filename
        self.data.to_excel(filename, startrow=self.last_row, sheet_name=self.sheet_name, header=True)
        self.last_row = 1
        self.writer = pd.ExcelWriter(self.filename, engine='openpyxl', mode='a', if_sheet_exists='overlay')

[docs]    def add_stat(self, name, first_value=0):
        """
        Add a new statistic to the table.
        It is recorded in monitoring function and how it is collected is up to the user.
        """
        self.data[name] = first_value
        self.columns.append(name)

    def __getitem__(self, item):
        return self.data[item]

    @property
    def shape(self):
        return self.data.shape

[docs]    def get_params(self, arg: dict, name: str):
        """
        Collect initial parameters and save them to Excel-file

        :param arg: a dictionary of parameters
        :param name: a name for the provided parameters
        :return:
        """
        series = pd.Series(arg)
        series.name = name
        self.parameters.append(series)
        try:
            series.to_excel(self.writer, sheet_name=name)
            self.writer.save()
        except Exception as e:
            print(f'Failed to save setup parameters to excel file: {e.args}')

[docs]    def append(self, *stats):
        """
        Add a new record to the statistics.
        The number of stats must include manually added ones

        :param stats: current simulation time, current number of deposited cells and manually added columns
        :return:
        """
        self.dt = 0
        self.av_temperature = 0
        record = {}
        cols = self.columns
        try:
            time_now = pd.Timestamp.now()
            record[cols[0]] = time_now
            record[cols[1]] = (time_now - self.data.at[0, cols[0]]).total_seconds()
            for i in range(len(stats)):
                record[cols[i+2]] = stats[i]
            # time_now = pd.Timestamp.now()
            # sim_time = stats[2]
            # time_passed = (stats[0] - self.data.at[0, cols[0]]).total_seconds()
            # sim_speed = stats[1] / time_passed
            # growth_speed = stats[2] / time_passed * 60 * 60
            # growth_rate = stats[2] / stats[1]
            # self.step = pd.Series({cols[1]:stats[0], cols[3]:stats[1]}, name=pd.Timestamp.now())
            # self.step.loc[self.shape[0]] = (stats[0], time_passed, stats[1], sim_speed, stats[2], growth_speed, growth_rate, stats[3])
            self.data.loc[self.shape[0]] = tuple([record[cols[i]] for i in range(len(cols))])
        except Exception as e:
            print('An error occurred while recording statistics.')
            print(e.args)

        # self.data = self.data.append(self.step) # DataFrame.append() is not an in-place method like list.append()

[docs]    def plot(self, x, y):
        """
        ['Time', 'Sim.time', 'Sim.speed', 'Volume', Min.precursor coverage', 'Growth rate']
        :param x:
        :param y:
        :return:
        """
        if x not in self.columns or y not in self.columns:
            print(f'Column with this name does not exist!')
            return
        self.plot(x=x, y=y)

[docs]    def save_to_file(self, force=False):
        """
        Write collected statistics to an Excel file.
        The gathered statistics are appended to the end of the table every couple of seconds
        Caution: the session keeps the file open until it finishes.
        """
        if not timeit.default_timer()-self.time > self.save_freq and not force:
            return
        else:
            self.time = timeit.default_timer()
        if force:
            last_row = 0
            header = True
        else:
            last_row = self.last_row
            header = False
        data = self.data.iloc[last_row:]
        try:
            data.to_excel(self.writer, startrow=last_row, sheet_name=self.sheet_name, header=header)
            self.writer.save()
            self.last_row = self.writer.sheets[self.sheet_name].max_row
        except Exception as e:
            print(f'Was unable to save statistics to file, the following error occurred: {e.args}')
            sys.exit()

    def __get_time_passed(self):
        self.data.iloc[1:, self.columns[1]] = self.data.loc[1:, 'Time'] - self.data.loc[:-1, 'Time']

    def __get_sim_speed(self):
        self.data.iloc['Sim.speed'] = self.data['Sim.time'] / self.data['Time']

    def get_growth_rate(self):
        delta = 4
        t = self.data['Sim.time']
        vol = self.data['Volume']
        gr = np.zeros_like(t)
        for i in range(delta, t.shape[0]):
            gr[i] = (vol[i] - vol[i-delta]) / (t[i] - t[i-delta])
        gr[gr==0] = np.nan
        self.data['Growth rate'] = gr


[docs]    def add_plots(self, *args, position='J1'):
        """
        Add scatter plots to the Excel-file.

            Args is a list of tuples of column names to be plotted: [(x1, y1), (x2, y2)]
            Position is a list of cells where to put the graphs (by the upper-left corner)
        """
        if self.writer:
            self.writer.save()
            self.writer.close()
            self.writer = None
        writer = pd.ExcelWriter(self.filename, engine='xlsxwriter',)
        self.data.to_excel(writer, sheet_name=self.sheet_name)

        for arg, pos in zip(*args, position):
            self.add_plot(*arg, writer, pos)

        # Close the Pandas Excel writer and output the Excel file.
        writer.save()
        writer.close()
        # if not self.writer:
        #     self.writer = pd.ExcelWriter(self.filename, engine='openpyxl', mode='a', if_sheet_exists='overlay')

    def add_plot(self, x, y, writer, position='J1'):
        def mag(val): # define magnitude of the number
            return abs(floor(log(abs(val), 10)))
        # workbook:xlsxwriter.Workbook = writer.book # debug:typing reveals chart methods
        workbook = writer.book
        worksheet = writer.sheets[self.sheet_name]

        df = self.data

        # Create a chart object.
        chart = workbook.add_chart({'type': 'scatter'})

        # Configure the series of the chart from the dataframe data.
        max_row = len(df)
        x_i = df.columns.to_list().index(x)+1
        y_i = df.columns.to_list().index(y)+1
        chart.add_series({
            'name': [self.sheet_name, 0, y_i],
            'categories': [self.sheet_name, 1, x_i, max_row, x_i],
            'values': [self.sheet_name, 1, y_i, max_row, y_i],
            'marker': { 'type': 'circle', 'size': 1,
                        'border': {'color': '#004586'},
                        'fill': {'color': '#004586'}, },
            'line': {'none': True},
        })
        chart.set_legend({'none': True})
        chart.chart_name = y

        # Define axis scale to include a 10% margin
        x_min = df[x].min()
        ax_min = np.round(x_min * 1.1, mag(x_min)+1) if x_min != 0 else x_min
        x_max = df[x].max()
        ax_max = np.round(x_max * 1.1, mag(x_max)+1) if x_max != 0 else x_max
        y_min = df[y].min()
        ay_min = np.round(y_min * 1.1, mag(y_min)+1) if y_min != 0 else y_min
        y_max = df[y].max()
        ay_max = np.round(y_max * 1.1, mag(y_max)+1) if y_max != 0 else y_max

        # Define major ticks
        ax_major = np.round((x_max - x_min) / 10, mag((x_max - x_min) / 10) - 1)
        ay_major = np.round((y_max - y_min) / 10, mag((y_max - y_min) / 10) - 1)
        # Configure the chart axes.
        chart.set_x_axis({'name': x,
                          'min': ax_min,
                          'max': ax_max,
                          'major_unit': ax_major,
                          'major_gridlines': {'visible': True, 'line': {'color':'#B3B3B3'}},})
        chart.set_y_axis({'name': y,
                          'min': ay_min,
                          'max': ay_max,
                          'major_unit': ay_major,
                          'major_gridlines': {'visible': True, 'line': {'color':'#B3B3B3'}},})
        chart.set_size({'x_scale': 1.2, 'y_scale': 1.5})
        chart.set_title({'name': y})

        # Insert the chart into the worksheet.
        worksheet.insert_chart(position, chart)

    def __del__(self):
        try:
            self.writer.save()
            self.writer.close()
            self.writer = None
        except UserWarning:
            pass
        except AttributeError:
            pass