Source code for xrd_tools.plugins.refinement_profex

"""A xrd-tools refinement interface plugin that integrates Profex/BGMN."""
import logging
import os
import re
import shutil
import subprocess
from dataclasses import dataclass

import pandas as pd
from uncertainties import UFloat, ufloat

from xrd_tools import refinement_interface_factory, utils
from xrd_tools.refinement import RefinementResult
from xrd_tools.refinement_interface import AppNotInstalledError, RefinementInterface

# Settings for plugin loader
NAME_REFINEMENT_INTERFACE = "profex"

# Settings for plugin itself,
#   command to start the refinement app (also displayed in logs)
REFINEMENT_APPLICATION = "profex"
#   'software' argument handed over to RefinementResult object
SOFTWARE = "BGMN"
#   properties of input data file
INPUT_DATA_SUFFIX = ""
INPUT_DATA_HEADER = False
INPUT_DATA_DELIMITER = " "
HEADER_GLOBAL_PARAMETER = "Global parameters and GOALs"
HEADER_PHASES = "Local parameters and GOALs for phase "
FOOTER_PHASES = "Atomic positions for phase "

logger = logging.getLogger(__name__)


[docs]@dataclass class ProfexInterface(RefinementInterface): """Profex/BGMN refinement interface. Args: measurement_id (str): ID of the measurement to be refined with profex. data (pd.Series): Series containing the x/y data of the measurement. The index represents the 2θ angle in °. dir_refinement (str): Path to the refinement project directory. encoding (str): Encoding used in refinement input data file. input_data_suffix (str): Suffix appended to ``measurement_id`` to generate refinement input- and project- filenames. Since the input data filename (without extension) is used as project name by profex, it is recommended to provide no suffix for the refinement files. input_data_delimiter (str): Delimeter to be used for creation of refinement input data file (profex can not read-in comma separated values). input_data_header (bool): Option to ignore header for creation of refinement input data file. """ measurement_id: str data: pd.Series dir_refinement: str encoding: str input_data_suffix: str = INPUT_DATA_SUFFIX input_data_header: bool = INPUT_DATA_HEADER input_data_delimiter: str = INPUT_DATA_DELIMITER name_phases: str = "phases" def _string_converter(self, value: str) -> float | int | str: """Convert a provided string into an integer or a float.""" try: res = int(value) except ValueError: try: res = float(value) except ValueError: res = value return res def _get_app_path(self) -> str: """Path to the profex binary, used to check if profex is installed.""" return shutil.which(REFINEMENT_APPLICATION) def _get_dia_header(self) -> dict[str, str]: """Read the header of the ``*.dia`` file, excluding ``TITLE``.""" with open(self.file_refinement_project, "r") as fobj: data = fobj.readline().strip("\n").split(" ") dct = {i.split("=")[0]: i.split("=")[1] for i in data[1:]} phases = [] k_to_pop = [] for k, v in dct.items(): self._string_converter(v) if k.startswith("STRUC"): phases.append(v) k_to_pop.append(k) for k in k_to_pop: dct.pop(k) dct[self.name_phases] = phases return dct def _get_refinement_statistics(self): """Extract the refinement statistics from the ``*.lst`` file, as well as the zero shift (``EPS1``) and sample displacement (``EPS2``) from the ``*.par`` file. Calculates goodness of fit (GoF) and chi_sqrdfrom from statisctics as follows: $$\frac{Rwp}{Rexp}^2 = gof^2 = chi^2$$ """ file_path = self.file_refinement_project.replace("dia", "lst") with open(file_path) as fobj: file_contents = fobj.read() # Extract Rp, Rpb, R, Rwp, and Rexp match = re.search( r"Rp=(\d+.\d+)%\s+Rpb=(\d+.\d+)%\s+R=(\d+.\d+)%\s+Rwp=(\d+.\d+)%\s+Rexp=(\d+.\d+)%", file_contents, ) Rp, Rpb, R, Rwp, Rexp = match.groups() stats = { "Rp": float(Rp), "Rpb": float(Rpb), "R": float(R), "Rwp": float(Rwp), "Rexp": float(Rexp), } stats["GoF"] = stats["Rwp"] / stats["Rexp"] stats["chi_sqrd"] = stats["GoF"] ** 2 with open(self.file_refinement_project.replace("dia", "par"), "r") as fobj: line = fobj.readline().strip("\n") eps1, eps2 = re.findall(r"(?:EPS1|EPS2)=(\S+)", line) stats["zero_shift"] = float(eps1) stats["sample_discplacement"] = float(eps2) return stats def _get_global_parameters(self): """Extract the global refinement parameter from the ``*.lst`` file.""" file_path = self.file_refinement_project.replace("dia", "lst") results = {} pattern = re.compile(r"[A-Z0-9]+=[\d+\.\d]+\+-\d+\.\d+") with open(file_path) as fobj: lines = fobj.readlines() # Extract global parameter for i, line in enumerate(lines): if HEADER_GLOBAL_PARAMETER in line: start_line = i + 1 break for line in lines[start_line:]: if not line.strip(): break match = pattern.search(line) if match: parameter, value = match.group().split("=") value, uncertainty = value.split("+-") results[parameter] = float(value), float(uncertainty) return results
[docs] def create_input_data(self) -> None: """Create refinement input data for profex.""" self.data.to_csv( self.file_refinement_input, sep=self.input_data_delimiter, header=self.input_data_header, encoding=self.encoding, ) logger.debug( "Created input data for profex refinement of measurement {self.measurement_id}." )
[docs] def get_cif_files(self) -> dict[str, str]: """Get a dictionary with the name and the path to the cif files for a refined phases.""" dia_header = self._get_dia_header() phases = dia_header[self.name_phases] cif_files = [ os.path.join(self.dir_refinement, file) for file in os.listdir(self.dir_refinement) if file.startswith(self.measurement_id) and file.endswith(".cif") ] if len(phases) != len(cif_files): raise ValueError( f"Amount of CIF files ({len(cif_files)}) not matching to amount of refined phases ({len(phases)})." ) return dict(zip(phases, cif_files))
[docs] def get_refined_data(self, i_calc: str, i_bg: str) -> pd.DataFrame: """Get a pandas DataFrame containing the refined data series. Args: i_calc (str): Column name for the calculated intensities. i_bg (str): Column name for the background intensities. Returns: pd.DataFrame: A DataFrame with index set to 2θ and an index name as found in the provided data. The columns correspond to: - I_calc with column name provided as argument, - I_bg with column name provided as argument, and - a further column for each phase refined, named with its name as defined in profex. """ # Extract the amount of structures from the header of the dia file columns = [self.data.name, i_calc, i_bg] dia_header = self._get_dia_header() phases = dia_header[self.name_phases] for phase in phases: columns.append(phase) # Read remaining data from dia file, set index and add columns utils.ensure_file_exists(self.file_refinement_project) df = pd.read_csv( self.file_refinement_project, sep=" ", skiprows=1, header=None, skipinitialspace=True, index_col=0, names=columns, ) df.index.name = self.data.index.name return df.drop([self.data.name], axis=1)
[docs] def get_refinement_result(self) -> RefinementResult: """Get a RefinementResults object for the refinement of the measurement.""" stats = self._get_refinement_statistics() return RefinementResult( r_wp=stats["Rwp"], r_exp=stats["Rexp"], gof=round(stats["GoF"], 2), chi_sqrd=round(stats["chi_sqrd"], 2), zero_shift=stats["zero_shift"], sample_discplacement=stats["sample_discplacement"], composition=self.get_composition(), software=SOFTWARE, version=None, )
# TODO: Ensure the mass fractions are assigned to the correct phases! #
[docs] def get_composition(self, mass_frac_prefix="Q") -> dict[UFloat]: """Get the composition of the refined sample.""" global_parameter = self._get_global_parameters() mass_fracs = [ ufloat(v[0], v[1]) for k, v in global_parameter.items() if k.startswith(mass_frac_prefix) ] return dict(zip(self.get_phases(), mass_fracs))
[docs] def get_phases(self) -> list[str]: """Get a list with phases determined in the refined sample.""" dia_header = self._get_dia_header() return [p for p in dia_header[self.name_phases]]
@property def file_refinement_input(self) -> str: """Path to ``*.xy`` refinement input data file. It is constructed as follows: ``<dir_refinement>/<measurement_id><suffix_data>.xy`` """ return os.path.join( self.dir_refinement, self.measurement_id + self.input_data_suffix + ".xy", ) @property def file_refinement_project(self) -> str: """Path to ``*.dia`` refinement project file. It is constructed as follows: ``<dir_refinement>/<measurement_id><suffix_data>.dia`` """ return os.path.join( self.dir_refinement, self.measurement_id + self.input_data_suffix + ".dia", )
[docs] def open_refinement(self) -> None: """Open the refinement project with profex. Raises: AppNotInstalledError: If profex is not installed on the machine. """ if self._get_app_path() is None: raise AppNotInstalledError( f"Refinement application {REFINEMENT_APPLICATION!r} not installed." ) # Get refinement project or input-data file if os.path.isfile(self.file_refinement_project): refinement_file = self.file_refinement_project else: refinement_file = self.file_refinement_input # Open refinement file with profex logger.debug( f"Refining measurement {self.measurement_id!r} with {REFINEMENT_APPLICATION!r}..." ) subprocess.run( [REFINEMENT_APPLICATION, refinement_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE, )
[docs]def register() -> None: """Register the module as *xrd-tools* refinement interface plugin at its factory.""" refinement_interface_factory.register(NAME_REFINEMENT_INTERFACE, ProfexInterface)