Source code for ddpc.io.read.dspaw_as

"""Read DS-PAW specified .as format file to ASE atoms."""

import re
from pathlib import Path
from typing import Any

import numpy as np
from ase.atoms import Atoms
from loguru import logger

from ddpc.io.utils import absf, remove_comments


[docs] @logger.catch def read(p: Path | str = "structure.as") -> Atoms: """Read DS-PAW atomic structure format file and convert to ASE Atoms object. This function reads DS-PAW's custom .as format files which support advanced features like lattice constraints, atomic position constraints, and magnetic moments. The format is used by the DS-PAW DFT code for structure input. Parameters ---------- p : pathlib.Path or str, default "structure.as" Path to the DS-PAW .as format structure file. Returns ------- ase.atoms.Atoms ASE Atoms object containing the crystal structure with: - Atomic positions (Cartesian or fractional coordinates) - Unit cell lattice vectors - Magnetic moments (if specified) - Constraint information stored in the info dictionary Notes ----- The DS-PAW .as format supports: - Lattice vector constraints (Fix_x, Fix_y, Fix_z per vector) - Atomic position constraints (Fix, Fix_x, Fix_y, Fix_z per atom) - Magnetic moments (collinear: Mag, non-collinear: Mag_x, Mag_y, Mag_z) - Both Cartesian and Direct (fractional) coordinate systems Constraint information is preserved in the Atoms.info dictionary for later use in calculations or file writing. Examples -------- >>> atoms = read("input.as") >>> print(atoms.get_chemical_formula()) 'Si2' >>> atoms = read(Path("structures/crystal.as")) >>> print(atoms.info.keys()) # Shows constraint information dict_keys(['Fix_x', 'Fix_y', 'Fix_z']) """ absfile = absf(p) lines = remove_comments(absfile, "#") natom = int(lines[1]) # number of atoms lattice = _get_lat(lines) lat_fixs = _get_latfixs(lines) elements, coords = _get_ele_pos(lines, natom) atom_fix, mag = _get_mag_fix(lines, natom) if "Mag" in mag: magmoms = mag.pop("Mag") elif "Mag_x" in mag and "Mag_y" in mag and "Mag_z" in mag: magmoms = np.array([mag.pop("Mag_x"), mag.pop("Mag_y"), mag.pop("Mag_z")]).T.tolist() else: magmoms = None # lat & atom fix info are stored in info property freedom = {"lat": lat_fixs} | atom_fix if any(lat_fixs) else atom_fix atoms = Atoms(symbols=elements, cell=lattice, info=freedom, magmoms=magmoms, pbc=True) cd = lines[6].strip().split()[0] # Cartesian/Direct _set_poses(atoms, cd, coords) return atoms
@logger.catch def _set_poses(atoms: Atoms, cd: str, coords: np.ndarray) -> None: """Set atomic positions in ASE Atoms object based on coordinate type. Parameters ---------- atoms : ase.atoms.Atoms ASE Atoms object to modify. cd : str Coordinate type: "Direct" for fractional or "Cartesian" for absolute. coords : numpy.ndarray Array of atomic coordinates with shape (n_atoms, 3). Raises ------ ValueError If coordinate type is neither "Direct" nor "Cartesian". """ # 'Cartesian/Direct Mag Fix_x ...' if cd == "Direct": atoms.set_scaled_positions(coords) elif cd == "Cartesian": atoms.set_positions(coords) else: raise ValueError("Structure file format error!") @logger.catch def _get_latfixs(lines: list[str]) -> list[bool]: """Extract lattice constraint information from DS-PAW .as file lines. Parameters ---------- lines : list of str Preprocessed lines from the .as file with comments removed. Returns ------- list of bool List of 9 boolean values representing lattice vector constraints: [a_x, a_y, a_z, b_x, b_y, b_z, c_x, c_y, c_z] True means the component is constrained (fixed). Raises ------ ValueError If lattice constraint format is not recognized. """ lat_fixs: list[bool] = [] if lines[2].strip() != "Lattice": # fix lattice lattice_fix_info = lines[2].strip().split()[1:] if lattice_fix_info == ["Fix_x", "Fix_y", "Fix_z"]: # ONLY support xyz fix in sequence, yzx will cause error for line in lines[3:6]: lfs = line.strip().split()[3:6] for lf in lfs: if lf.startswith("T"): lat_fixs.append(True) elif lf.startswith("F"): lat_fixs.append(False) elif lattice_fix_info == ["Fix"]: for line in lines[3:6]: lf = line.strip().split()[3] if lf.startswith("T"): lat_fixs.extend([True, True, True]) elif lf.startswith("F"): lat_fixs.extend([False, False, False]) else: raise ValueError("Lattice fix info error!") return lat_fixs @logger.catch def _get_lat(lines: list[str]) -> np.ndarray: """Parse lattice vectors from DS-PAW .as file lines. Parameters ---------- lines : list of str Preprocessed lines from the .as file with comments removed. Returns ------- numpy.ndarray 3x3 array containing lattice vectors as rows: [[a_x, a_y, a_z], [b_x, b_y, b_z], [c_x, c_y, c_z]] """ lattice = [] # lattice matrix for line in lines[3:6]: vector = line.split() lattice.extend([float(vector[0]), float(vector[1]), float(vector[2])]) return np.asarray(lattice).reshape(3, 3) @logger.catch def _get_ele_pos(lines: list[str], natom: int) -> tuple[list[str], np.ndarray]: """Extract element symbols and atomic positions from DS-PAW .as file lines. Parameters ---------- lines : list of str Preprocessed lines from the .as file with comments removed. natom : int Number of atoms in the structure. Returns ------- tuple of (list of str, numpy.ndarray) - elements: List of element symbols with underscores removed - coords: Array of atomic coordinates with shape (natom, 3) """ elements = [] positions = [] for i in range(natom): atom_data = lines[i + 7].strip().split() elements.append(atom_data[0]) positions.extend([float(atom_data[1]), float(atom_data[2]), float(atom_data[3])]) coords = np.asarray(positions).reshape(-1, 3) elements = [re.sub(r"_", "", e) for e in elements] return elements, coords @logger.catch def _get_mag_fix(lines: list[str], natom: int) -> tuple[dict, dict]: """Extract magnetic moments and atomic constraints from DS-PAW .as file lines. Parameters ---------- lines : list of str Preprocessed lines from the .as file with comments removed. natom : int Number of atoms in the structure. Returns ------- tuple of (dict, dict) - atom_fix: Dictionary with constraint information for each atom - mag: Dictionary with magnetic moment information for each atom Notes ----- Supported constraint types: - "Fix": 3D constraint (x, y, z components) - "Fix_x", "Fix_y", "Fix_z": Individual component constraints Supported magnetic moment types: - "Mag": Collinear magnetic moments - "Mag_x", "Mag_y", "Mag_z": Non-collinear magnetic moment components """ l6 = lines[6].strip() mf_info = l6.split()[1:] for item in mf_info: assert item in [ "Mag", "Mag_x", "Mag_y", "Mag_z", "Fix", "Fix_x", "Fix_y", "Fix_z", ] def handle_fix_value(val_str): return val_str.startswith("T") mag_fix_dict: dict[str, list] = {} # may be empty for mf_index, item in enumerate(mf_info): values: list[Any] = [] for i in range(natom): atom_data = lines[i + 7].strip().split() mf_data = atom_data[4:] if item == "Fix": # Handle "Fix" which is a list of three "Fix_" values values.append( [ handle_fix_value(mf_data[mf_index]), handle_fix_value(mf_data[mf_index + 1]), handle_fix_value(mf_data[mf_index + 2]), ] ) elif item.startswith("Fix_"): # Handle "Fix_x", "Fix_y", "Fix_z" values.append(handle_fix_value(mf_data[mf_index])) else: # Mag, Mag_x, Mag_y, Mag_z values.append(float(mf_data[mf_index])) mag_fix_dict[item] = values # split into atom_fix and mag dicts atom_fix = {k: v for k, v in mag_fix_dict.items() if k.startswith("Fix")} mag = {k: v for k, v in mag_fix_dict.items() if k.startswith("Mag")} return atom_fix, mag