Source code for alframework.qm_interfaces.orca5_interface

from parsl import python_app, bash_app
import os
import numpy as np
import json
from ase.atoms import Atoms
import ase.io
import shutil
import re

from alframework.tools.molecules_class import MoleculesObject

[docs] class orcaGenerator(): """ ORCA 5.0.1 interface with ALF. """ def __init__(self, scratch_path="/tmp/", store_path=None, nproc=36, unit={'energy': 'hartree', 'length': 'bohr'}, orca_env_file=None, orca_command="", orcainput="", orcablocks=""): """ unit used for input coordinates: Angstrom default units for output: atomic unit output parsing is for orca 5 Attributes: scratch_path (str): Path of the scratch directory. store_path (str): Path of the directory to store the output files. nproc (int): Number of processors to use in the calculation. unit (dict): Units of the basic properties (e.g. energy, length). orca_env_file (str): Path of ORCA's environment configuration file. orca_command (str): Command to run ORCA. orcainput (str): ORCA's input parameters. orcablocks (str): ORCA's input file blocks. """ self.scratch_path = scratch_path self.store_path = store_path self.nproc = nproc self.orca_env_fn = orca_env_file self.orca_command = orca_command self.orcainput = orcainput.strip() self.orcablocks = orcablocks if orcablocks.endswith('\n') else orcablocks+'\n' self.unit = unit #self.E_unit = 1.0 # default a.u., hartree for energy, bohr for length as used for orca #self.L_unit = 1.0 if self.unit['energy'] == 'hartree': self.E_unit = 1.0 elif self.unit['energy'] == 'ev': self.E_unit = 27.2113834 else: raise KeyError('energy unit not implemented') if self.unit['length'] =='bohr': self.L_unit = 1.0 elif self.unit['length'] == 'angstrom': self.L_unit = 0.5291772083 else: raise KeyError('length unit not implemented') self.datacounter = 0
[docs] def write_orca_input(self, ase_atoms, charge, multiplicity, job_path, filename="orca.inp"): """Write ORCA's input file. Args: ase_atoms (ase.Atoms): ase.Atoms object describing the system of interest. charge (int): Charge of the system. multiplicity (int): Multiplicity of the system 2S+1. job_path (str): Path of the directory to store the input file. filename (str): Input file name. Returns (None) """ with open(job_path + filename, "w") as f: f.write("! "+ self.orcainput + "\n") if not(self.nproc is None) and not(self.nproc == 1): f.write("%pal nproc " + str(self.nproc) + " end\n") f.write(self.orcablocks) f.write("* xyzfile %d %d input.xyz\n" % (charge, multiplicity)) ase.io.write(job_path+"input.xyz", ase_atoms,format='xyz')
[docs] def single_point(self, molecule, prefix="orca", properties=['energy', 'forces']): """Runs a single point calculation using ORCA. Unlike other generators, the choice of calculations will be performed is setup in __init__: orcainput Args: molecule (ase.Atoms): ase.Atoms object that describes the system. prefix (str): Prefix common to all input and output files. properties (list): List containing the properties to extract from ORCA's output file. Returns: (dict): Dictionary that stores the desired properties of the single point calculation. """ #logF = open('orca_run.log','w') #assert molecule.periodic() == False, 'ORCA does not support periodic boundary condition' job_path = self.scratch_path os.makedirs(job_path, exist_ok=True) #logF.write('directory made\n') # prepare orca input self.write_orca_input(molecule, 0, 1, job_path, filename=prefix+".inp") #logF.write('input printed\n') # run job if self.orca_env_fn is None: #logF.write("cd " + job_path +" ; " + self.orca_command + " " + prefix + ".inp > " + prefix + ".log\n") os.system("cd " + job_path +" ; " + self.orca_command + " " + prefix + ".inp > " + prefix + ".log") else: #logF.write("source " + self.orca_env_fn + " ; cd " + job_path +" ; " + self.orca_command + " " + prefix + ".inp > " + prefix + ".log\n") os.system("source " + self.orca_env_fn + " ; cd " + job_path +" ; " + self.orca_command + " " + prefix + ".inp > " + prefix + ".log") #logF.close() # parse output, force and energy if self.check_normal_termination( job_path + prefix + ".log"): n_atom = len(molecule) propertiesout = self.parse_output(job_path, prefix, n_atom, properties) propertiesout['converged'] = True else: propertiesout = {"converged": False} # save files and clean up if self.store_path is not None: os.system("cd " + self.scratch_path + " && tar -cvf " + self.store_path + "/orca_job_" + str(molecule.ids) + ".tar orca_job_" + str(molecule.ids)) shutil.rmtree(job_path) self.datacounter += 1 return propertiesout
[docs] def check_normal_termination(self, logfile): """Check if the QM calculation converged. Args: logfile (str): Path to ORCA output log file. Returns: (bool): True if converged and False otherwise. """ d = open(logfile).read() return "TOTAL RUN TIME: " in d and "SCF CONVERGED AFTER" in d
[docs] def parse_output(self, job_path, prefix, natom, properties, sign=-1): """Parse ORCA output file and extract the properties of interest. Args: job_path (str): Path of the directory that stores the output file. prefix (str): Name of the output file. natom (int): Number of atoms in the QM simulation. properties (list): List of properties to be extracted. sign (int): Specifies whether we want the forces (-1) or the gradient of the potential (1). Returns: (dict): Dictionary containing the extracted properties. """ # sign: -1: force, +1, gradient outengrad = open(job_path + prefix + '.engrad','r').read() outlog = open(job_path + prefix + '.log','r').read() outproperty = open(job_path + prefix + '_property.txt','r').read() reTOTALenergy = re.compile("The current total energy in Eh\n#\n([\s\S]+?)#") reSCFenergy = re.compile("SCF Energy:([\s\S]+?)\n") reCORRenergy = re.compile("Correlation Energy:([\s\S]+?)\n") reEnergy = re.compile("Total[\s\S]+?Energy:([\s\S]+?)\n") reDipole = re.compile("Electric_Properties\n[\s\S]+?Total Dipole moment:\n([\s\S]+?)---------------------") reQuad = re.compile("Electric_Properties\n[\s\S]+?Total quadrupole moment\n([\s\S]+?)# --------------") reGrad = re.compile('The current gradient in Eh\/bohr\n#\n([\s\S]+?)#') reHirshTrue = re.compile("HIRSHFELD ANALYSIS") reHirsh = re.compile("HIRSHFELD ANALYSIS\n[\s\S]+?SPIN \n([\s\S]+?)TOTAL") #Always pull in energies and forces from engrad outproperties = {} if 'forces' in properties: outproperties['forces'] = np.array([float(i) for i in reGrad.findall(outengrad)[-1].split("\n")[:natom*3]]).reshape([natom,3]) * sign * self.E_unit/self.L_unit if 'energy' in properties: outproperties['energy'] = float(reTOTALenergy.findall(outengrad)[-1]) * self.E_unit if 'SCF_energy' in properties: outproperties['SCF_energy'] = float(reEnergy.findall(outproperty)[-1]) * self.E_unit if 'CORR_energy' in properties: outproperties['CORR_energy'] = float(reEnergy.findall(outproperty)[-1]) * self.E_unit if 'dipole' in properties: outproperties['dipole'] = np.array([float(i.split()[1]) for i in reDipole.findall(outproperty)[-1].split("\n")[1:4]]) if 'quadrupole' in properties: outproperties['quadrupole'] = np.array([[float(j) for j in i.split()[1:4]] for i in reQuad.findall(outproperty)[-1].split("\n")[1:4]]) if 'hirshfeld' in properties: outproperties['hirshfeld'] = np.array([float(i.split()[2]) for i in reHirsh.findall(outlog)[-1].split("\n")[:natom]]) if 'hirshfeld_spin' in properties: outproperties['hirshfeld_spin'] = np.array([float(i.split()[3]) for i in reHirsh.findall(outlog)[-1].split("\n")[:natom]]) return outproperties
# def parse_output(self, job_path, prefix, natom, sign=-1): # # sign: -1: force, +1, gradient # fn = job_path + prefix + ".engrad" # energy = float(os.popen("grep energy %s -A 2 | tail -n 1" % fn).read().strip().split()[0])*self.E_unit # forces = sign*np.asarray([float(x.strip().split()[0]) for x in os.popen("grep gradient %s -A %d | tail -n %d" \ # % (fn, natom*3+1, natom*3)).read().split("\n")[:-1]]).reshape(-1,3)*self.E_unit/self.L_unit # fn = job_path + prefix + ".log" # mulliken = np.asarray( [ float(x.strip().split()[-1]) # for x in os.popen("grep 'MULLIKEN ATOMIC CHARGES' %s -A %d | tail -n %d" # % (fn, natom+1, natom)).read().split("\n")[:-1]]) # fn = job_path + prefix + "_property.txt" # dipole = np.asarray( [ float(x.strip().split()[-1]) for x in \ # os.popen("grep 'Total Dipole moment:' %s -A 4 | tail -n 3" % fn).read().strip().split('\n')])*self.L_unit # # # # return_dictionary = {"energy":energy,"forces":forces,"dipole":dipole,"mulliken":mulliken} # return return_dictionary @python_app(executors=['alf_QM_executor']) def orca_calculator_task(molecule_object, QM_config, QM_scratch_dir, properties_list): """ORCA calculator task based on the parameters in qm_config.json. Args: molecule_object (MoleculesObject): An object from the class MoleculesObject. QM_config (str): Relative path to QM_config.json file that contains the parameters needed to run ORCA. QM_scratch_dir (str): Relative path of the directory that stores the QM calculations. properties_list (dict): Dicitonary containing the properties to extract from ORCA output file that later will be stored in the database. Returns: (MoleculesObject): A MoleculesObject containing the results of the QM calculations. """ assert isinstance(molecule_object, MoleculesObject), 'molecule_object must be an instance of MoleculesObject' properties = list(properties_list.keys()) directory = QM_scratch_dir + '/' + molecule_object.get_moleculeid() + '/' atoms = molecule_object.get_atoms() orca = orcaGenerator(scratch_path=directory, nproc=QM_config['ncpu'], orca_env_file=QM_config['orca_env_file'], orca_command=QM_config['QM_run_command'], orcainput=QM_config['orcasimpleinput'], orcablocks=QM_config['orcablocks']) out_properties = orca.single_point(molecule=atoms, properties=properties) molecule_object.store_results(out_properties) molecule_object.set_converged_flag(out_properties['converged']) return molecule_object @python_app(executors=['alf_QM_executor']) def orca_double_calculator_task(molecule_object, QM_config, QM_scratch_dir, properties_list): """Double ORCA calculator task based on the parameters in qm_config.json. Args: molecule_object (MoleculesObject): An object from the class MoleculesObject. QM_config (str): Relative path to QM_config.json file that contains the parameters needed to run ORCA. QM_scratch_dir (str): Relative path of the directory that stores the QM calculations. properties_list (dict): Dicitonary containing the properties to extract from ORCA output file and which later will be stored in the database. Returns: (MoleculesObject): A MoleculesObject containing the results of the QM calculations. """ assert isinstance(molecule_object, MoleculesObject), 'molecule_object must be an instance of MoleculesObject' properties = list(properties_list.keys()) directory = QM_scratch_dir + '/' + molecule_object.get_moleculeid() atoms = molecule_object.get_atoms() directory1 = directory + '/1/' directory2 = directory + '/2/' orca1 = orcaGenerator(scratch_path=directory1, nproc=QM_config['ncpu'], orca_env_file=QM_config['orca_env_file'], orca_command=QM_config['QM_run_command'], orcainput=QM_config['orcasimpleinput'], orcablocks=QM_config['orcablocks']) properties1 = orca1.single_point(molecule=atoms, properties=properties) orca2 = orcaGenerator(scratch_path=directory2, nproc=QM_config['ncpu'], orca_env_file=QM_config['orca_env_file'], orca_command=QM_config['QM_run_command'], orcainput=QM_config['orcasimpleinput'], orcablocks=QM_config['orcablocks']) properties2 = orca2.single_point(molecule=atoms, properties=properties) maxEdev = np.abs(properties1['energy'] - properties2['energy']) maxFdev = np.max(np.abs(properties1['forces'] - properties2['forces'])) properties = {} for key in list(properties1): properties[key] = np.mean([properties1[key], properties2[key]], axis=0) if (maxEdev < QM_config['Ediff']) and (maxFdev < QM_config['Fdiff']) and properties1['converged'] and properties2['converged']: molecule_object.set_converged_flag(True) properties['converged'] = True # Leaving this line for now, afraid of breaking code. else: molecule_object.set_converged_flag(False) properties['converged'] = False # Leaving this line for now, afraid of breaking code. molecule_object.store_results(properties) return molecule_object