Source code for xrd_tools.analyse_measurements

from dataclasses import dataclass, field

import matplotlib.pyplot as plt
import pandas as pd

from .measurement_manager import MeasurementManager
from .measurement import Measurement

ANALYSE_COLS = {
    "measurement_id": "measurement ID",
    "sample": "sample",
}


[docs]@dataclass
class AnalyseMeasurements:

    """Perform operations and compare multiple measurements.

    Args:
        measurements (list[Measurement]) : Measurement objects for measurements of interest.
        _cols : Dictionary with strings to assign data series internally.
    """

    measurements: list[Measurement]
    _cols: dict[str, str] = field(default_factory=lambda: ANALYSE_COLS)

    @property
    def measurement_ids(self) -> list[str]:
        """Returns list of strings with measurement IDs."""
        return [m.meta.measurement_id for m in self.measurements]

    def _id_to_index(self, data: pd.DataFrame, drop=False) -> pd.DataFrame:
        """Adds measurement IDs as index to a data frame."""
        pd_obj = data.copy()
        pd_obj[self._cols["measurement_id"]] = [
            m.meta.measurement_id for m in self.measurements
        ]
        pd_obj = pd_obj.reset_index(drop=drop).set_index(self._cols["measurement_id"])
        return pd_obj

    def _sample_to_multiindex(self, data: pd.DataFrame) -> pd.DataFrame:
        """Adds sample and measurement ID as multiindex to data frame."""
        pd_obj = data.copy()
        if not self._cols["sample"] in pd_obj.columns:
            pd_obj[self._cols["sample"]] = self.samples
        return pd_obj.reset_index().set_index(
            [self._cols["sample"], self._cols["measurement_id"]]
        )

    @property
    def samples(self) -> pd.Series:
        """Returns samples"""
        samples = pd.Series([m.meta.sample for m in self.measurements])
        samples.name = self._cols["sample"]
        return self._id_to_index(samples.to_frame(), drop=True)

[docs]    @classmethod
    def from_ids(
        cls, ids: list[str], m_manager: MeasurementManager = MeasurementManager()
    ):
        """Alternative constructor to initalise instance by list of measurements IDs.

        Attributes
        ----------
        ids:
            List of strings with measurements IDs.
        m_manager:
            MeasurementManager initiated with measurements directory of interest.
        """
        return cls([m_manager.get_measurement(i) for i in ids])

[docs]    def sample_to_df(self, df: pd.DataFrame, col_name="sample") -> pd.DataFrame:
        """Inserts column with sample IDs into provided Pandas DataFrame.

        Attributes
        ----------
        df:
            Pandas DataFrame containing rows for each measurement.
        col_name:
            Column name used for the added column.
        """
        df = df.copy()
        df[col_name] = [m.meta.sample for m in self.measurements]
        return df

[docs]    def multiplot(self, norm: bool = True):
        """Compare measurements for provided sample IDs in a multiplot.

        Attributes
        ----------
        norm:
            Flag to indicate if normalised data are plotted, which is the recommended
            setting since the y-axis are shared between the individual subplots.
        """
        n = len(self.measurements)
        fig_kwargs = {
            "sharex": True,
            "sharey": True,
            "tight_layout": True,
            "figsize": (20 / 2.54, n * 4 / 2.54),
            "num": "XRD measurement comparison",
        }
        fig, axs = plt.subplots(n, 1, **fig_kwargs)
        for m, ax in zip(self.measurements, axs):
            data = m.data_norm
            m._data_to_plot(ax, norm=norm)
            ax.text(
                0.99,
                0.85,
                f"{m.meta.measurement_id} | {m.meta.sample}",
                horizontalalignment="right",
                verticalalignment="center",
                transform=ax.transAxes,
            )

        axs[-1].set_xlabel(m.data_norm.index.name)
        fig.align_ylabels()
        plt.show()