"""A xrd-tools refinement interface plugin that integrates Profex/BGMN."""
import logging
import os
import re
import shutil
import subprocess
from dataclasses import dataclass
import pandas as pd
from uncertainties import UFloat, ufloat
from xrd_tools import refinement_interface_factory, utils
from xrd_tools.refinement import RefinementResult
from xrd_tools.refinement_interface import AppNotInstalledError, RefinementInterface
# Settings for plugin loader
NAME_REFINEMENT_INTERFACE = "profex"
# Settings for plugin itself,
# command to start the refinement app (also displayed in logs)
REFINEMENT_APPLICATION = "profex"
# 'software' argument handed over to RefinementResult object
SOFTWARE = "BGMN"
# properties of input data file
INPUT_DATA_SUFFIX = ""
INPUT_DATA_HEADER = False
INPUT_DATA_DELIMITER = " "
HEADER_GLOBAL_PARAMETER = "Global parameters and GOALs"
HEADER_PHASES = "Local parameters and GOALs for phase "
FOOTER_PHASES = "Atomic positions for phase "
logger = logging.getLogger(__name__)
[docs]@dataclass
class ProfexInterface(RefinementInterface):
"""Profex/BGMN refinement interface.
Args:
measurement_id (str): ID of the measurement to be refined with profex.
data (pd.Series): Series containing the x/y data of the measurement.
The index represents the 2θ angle in °.
dir_refinement (str): Path to the refinement project directory.
encoding (str): Encoding used in refinement input data file.
input_data_suffix (str): Suffix appended to ``measurement_id`` to generate
refinement input- and project- filenames. Since the input data filename
(without extension) is used as project name by profex, it is recommended
to provide no suffix for the refinement files.
input_data_delimiter (str): Delimeter to be used for creation of refinement
input data file (profex can not read-in comma separated values).
input_data_header (bool): Option to ignore header for creation of refinement
input data file.
"""
measurement_id: str
data: pd.Series
dir_refinement: str
encoding: str
input_data_suffix: str = INPUT_DATA_SUFFIX
input_data_header: bool = INPUT_DATA_HEADER
input_data_delimiter: str = INPUT_DATA_DELIMITER
name_phases: str = "phases"
def _string_converter(self, value: str) -> float | int | str:
"""Convert a provided string into an integer or a float."""
try:
res = int(value)
except ValueError:
try:
res = float(value)
except ValueError:
res = value
return res
def _get_app_path(self) -> str:
"""Path to the profex binary, used to check if profex is installed."""
return shutil.which(REFINEMENT_APPLICATION)
def _get_dia_header(self) -> dict[str, str]:
"""Read the header of the ``*.dia`` file, excluding ``TITLE``."""
with open(self.file_refinement_project, "r") as fobj:
data = fobj.readline().strip("\n").split(" ")
dct = {i.split("=")[0]: i.split("=")[1] for i in data[1:]}
phases = []
k_to_pop = []
for k, v in dct.items():
self._string_converter(v)
if k.startswith("STRUC"):
phases.append(v)
k_to_pop.append(k)
for k in k_to_pop:
dct.pop(k)
dct[self.name_phases] = phases
return dct
def _get_refinement_statistics(self):
"""Extract the refinement statistics from the ``*.lst`` file,
as well as the zero shift (``EPS1``) and sample displacement (``EPS2``)
from the ``*.par`` file.
Calculates goodness of fit (GoF) and chi_sqrdfrom from statisctics as follows:
$$\frac{Rwp}{Rexp}^2 = gof^2 = chi^2$$
"""
file_path = self.file_refinement_project.replace("dia", "lst")
with open(file_path) as fobj:
file_contents = fobj.read()
# Extract Rp, Rpb, R, Rwp, and Rexp
match = re.search(
r"Rp=(\d+.\d+)%\s+Rpb=(\d+.\d+)%\s+R=(\d+.\d+)%\s+Rwp=(\d+.\d+)%\s+Rexp=(\d+.\d+)%",
file_contents,
)
Rp, Rpb, R, Rwp, Rexp = match.groups()
stats = {
"Rp": float(Rp),
"Rpb": float(Rpb),
"R": float(R),
"Rwp": float(Rwp),
"Rexp": float(Rexp),
}
stats["GoF"] = stats["Rwp"] / stats["Rexp"]
stats["chi_sqrd"] = stats["GoF"] ** 2
with open(self.file_refinement_project.replace("dia", "par"), "r") as fobj:
line = fobj.readline().strip("\n")
eps1, eps2 = re.findall(r"(?:EPS1|EPS2)=(\S+)", line)
stats["zero_shift"] = float(eps1)
stats["sample_discplacement"] = float(eps2)
return stats
def _get_global_parameters(self):
"""Extract the global refinement parameter from the ``*.lst`` file."""
file_path = self.file_refinement_project.replace("dia", "lst")
results = {}
pattern = re.compile(r"[A-Z0-9]+=[\d+\.\d]+\+-\d+\.\d+")
with open(file_path) as fobj:
lines = fobj.readlines()
# Extract global parameter
for i, line in enumerate(lines):
if HEADER_GLOBAL_PARAMETER in line:
start_line = i + 1
break
for line in lines[start_line:]:
if not line.strip():
break
match = pattern.search(line)
if match:
parameter, value = match.group().split("=")
value, uncertainty = value.split("+-")
results[parameter] = float(value), float(uncertainty)
return results
[docs] def get_cif_files(self) -> dict[str, str]:
"""Get a dictionary with the name and the path to the cif files for a refined phases."""
dia_header = self._get_dia_header()
phases = dia_header[self.name_phases]
cif_files = [
os.path.join(self.dir_refinement, file)
for file in os.listdir(self.dir_refinement)
if file.startswith(self.measurement_id) and file.endswith(".cif")
]
if len(phases) != len(cif_files):
raise ValueError(
f"Amount of CIF files ({len(cif_files)}) not matching to amount of refined phases ({len(phases)})."
)
return dict(zip(phases, cif_files))
[docs] def get_refined_data(self, i_calc: str, i_bg: str) -> pd.DataFrame:
"""Get a pandas DataFrame containing the refined data series.
Args:
i_calc (str): Column name for the calculated intensities.
i_bg (str): Column name for the background intensities.
Returns:
pd.DataFrame: A DataFrame with index set to 2θ and an index name
as found in the provided data. The columns correspond to:
- I_calc with column name provided as argument,
- I_bg with column name provided as argument, and
- a further column for each phase refined, named with its name as defined
in profex.
"""
# Extract the amount of structures from the header of the dia file
columns = [self.data.name, i_calc, i_bg]
dia_header = self._get_dia_header()
phases = dia_header[self.name_phases]
for phase in phases:
columns.append(phase)
# Read remaining data from dia file, set index and add columns
utils.ensure_file_exists(self.file_refinement_project)
df = pd.read_csv(
self.file_refinement_project,
sep=" ",
skiprows=1,
header=None,
skipinitialspace=True,
index_col=0,
names=columns,
)
df.index.name = self.data.index.name
return df.drop([self.data.name], axis=1)
[docs] def get_refinement_result(self) -> RefinementResult:
"""Get a RefinementResults object for the refinement of the measurement."""
stats = self._get_refinement_statistics()
return RefinementResult(
r_wp=stats["Rwp"],
r_exp=stats["Rexp"],
gof=round(stats["GoF"], 2),
chi_sqrd=round(stats["chi_sqrd"], 2),
zero_shift=stats["zero_shift"],
sample_discplacement=stats["sample_discplacement"],
composition=self.get_composition(),
software=SOFTWARE,
version=None,
)
# TODO: Ensure the mass fractions are assigned to the correct phases! #
[docs] def get_composition(self, mass_frac_prefix="Q") -> dict[UFloat]:
"""Get the composition of the refined sample."""
global_parameter = self._get_global_parameters()
mass_fracs = [
ufloat(v[0], v[1])
for k, v in global_parameter.items()
if k.startswith(mass_frac_prefix)
]
return dict(zip(self.get_phases(), mass_fracs))
[docs] def get_phases(self) -> list[str]:
"""Get a list with phases determined in the refined sample."""
dia_header = self._get_dia_header()
return [p for p in dia_header[self.name_phases]]
@property
def file_refinement_input(self) -> str:
"""Path to ``*.xy`` refinement input data file.
It is constructed as follows:
``<dir_refinement>/<measurement_id><suffix_data>.xy``
"""
return os.path.join(
self.dir_refinement,
self.measurement_id + self.input_data_suffix + ".xy",
)
@property
def file_refinement_project(self) -> str:
"""Path to ``*.dia`` refinement project file.
It is constructed as follows:
``<dir_refinement>/<measurement_id><suffix_data>.dia``
"""
return os.path.join(
self.dir_refinement,
self.measurement_id + self.input_data_suffix + ".dia",
)
[docs] def open_refinement(self) -> None:
"""Open the refinement project with profex.
Raises:
AppNotInstalledError: If profex is not installed on the machine.
"""
if self._get_app_path() is None:
raise AppNotInstalledError(
f"Refinement application {REFINEMENT_APPLICATION!r} not installed."
)
# Get refinement project or input-data file
if os.path.isfile(self.file_refinement_project):
refinement_file = self.file_refinement_project
else:
refinement_file = self.file_refinement_input
# Open refinement file with profex
logger.debug(
f"Refining measurement {self.measurement_id!r} with {REFINEMENT_APPLICATION!r}..."
)
subprocess.run(
[REFINEMENT_APPLICATION, refinement_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
[docs]def register() -> None:
"""Register the module as *xrd-tools* refinement interface plugin at its factory."""
refinement_interface_factory.register(NAME_REFINEMENT_INTERFACE, ProfexInterface)