Source code for libra_py.DFTB_methods

#*********************************************************************************
#* Copyright (C) 2019-2020  Alexey V. Akimov, Brendan Smith
#*
#* This file is distributed under the terms of the GNU General Public License
#* as published by the Free Software Foundation, either version 3 of
#* the License, or (at your option) any later version.
#* See the file LICENSE in the root directory of this distribution
#* or <http://www.gnu.org/licenses/>.
#*
#*********************************************************************************/
"""
.. module:: DFTB_methods
   :platform: Unix, Windows
   :synopsis: This module implements functions for dealing with the outputs from DFTB+ package

.. moduleauthor:: 
       Alexey V. Akimov 
  
"""


import os
import sys
import math
import re

if sys.platform=="cygwin":
    from cyglibra_core import *
elif sys.platform=="linux" or sys.platform=="linux2":
    from liblibra_core import *
import util.libutil as comn

from . import units
from . import scan
from . import regexlib as rgl

from libra_py import CP2K_methods
import numpy as np




[docs]def get_energy_forces(filename, nat): """Get forces from the input file Args: filename ( string ): the name of the file to read, usually this is a the file "detailed.out" produced with the input containing option "CalculateForces = Yes " nat ( int ): the number of atoms in the system Returns: tuple: ( E_ex, F, Flst ), where: * E_ex ( double ): excitation energy for the given state [ units: a.u. ] * F ( MATRIX(ndof, 1) ): the forces acting on all atoms [ units: a.u. of force ] * Flst ( list of [x, y, z] ): the forces acting on all atoms [ units: a.u. of force ] Warning: it is likely not gonna work for files other than "detailed.out" """ # Read the file f = open(filename, "r") A = f.readlines() f.close() # Returned properties initialized E_ex = 0.0 F = MATRIX(3 * nat, 1) Flst = [] # Lets look for what we need sz = len(A) for i in range(0,sz): tmp = A[i].split() #=========== Look for forces ============= if len(tmp)==2: if tmp[0]=="Total" and tmp[1]=="Forces": for j in range(i+1, i+1+nat): ind = j - i - 1 tmp1 = A[j].split() x = float(tmp1[0]) y = float(tmp1[1]) z = float(tmp1[2]) F.set(3*ind+0, 0, x); F.set(3*ind+1, 0, y); F.set(3*ind+2, 0, z) Flst.append([x, y, z]) #=========== Excitation energy ============= if len(tmp)>3: if tmp[0]=="Excitation" and tmp[1]=="Energy:": E_ex = float(tmp[2]) return E_ex, F, Flst
[docs]def get_dftb_matrices(filename, act_sp1=None, act_sp2=None): """Get the matrices printed out by the DFTB+ Args: filename ( string ): the name of the file to read, usually these are any of the files: "hamsqr1.dat", "hamsqr2.dat", etc. or "oversqrt.dat". Produced with the input containing option "WriteHS = Yes" act_sp1 ( list of ints or None): the row active space to extract from the original files Indices here start from 0. If set to None - the number of AOs will be determined automatically from the file. [default: None] act_sp2 ( list of ints or None): the cols active space to extract from the original files Indices here start from 0. If set to None - the number of AOs will be determined automatically from the file. [default: None] Returns: list of CMATRIX(N, M): X: where N = len(act_sp1) and M = len(act_sp2) These are the corresponding property matrices (converted to the complex type) for each k-point, such that X[ikpt] is a CMATRIX(N, M) containing the overlaps/Hamiltonians for the k-point ```ikpt```. Warning: So far tested only for a single k-point! """ # Copy the content of the file to a temporary location f = open(filename, "r") A = f.readlines() f.close() norbs = int(float(A[1].split()[1])) nkpts = int(float(A[1].split()[2])) # Determine the dimensions of the output matrices if act_sp1==None: nstates1 = norbs act_sp1 = list(range(0, nstates1)) else: nstates1 = len(act_sp1) if act_sp2==None: nstates2 = norbs act_sp2 = list(range(0, nstates2)) else: nstates2 = len(act_sp2) # Output variables X = [] # For each k-point for ikpt in range(0,nkpts): # Write just the matrix data into a temporary files # This procedure is made fault-resistant, to avoid wrong working # when some of the matrix elements printed out are nonsense. B = A[5+ikpt*norbs : 5+(1+ikpt)*norbs] f1 = open(filename+"_tmp", "w") for i in range(0,norbs): tmp = B[i].split() line = "" for j in range(0,norbs): z = 0.0 if tmp[j]=="NaN": z = 0.0 else: try: z = float(tmp[j]) except ValueError: z = 0.0 pass except TypeError: z = 0.0 pass line = line + "%10.8f " % (z) line = line + "\n" f1.write(line) f1.close() # Read in the temporary file - get the entire matrix x = MATRIX(norbs, norbs); x.Load_Matrix_From_File(filename+"_tmp") # Extract the sub-matrix of interest x_sub = MATRIX(nstates1, nstates2) pop_submatrix(x, x_sub, act_sp1, act_sp2) # Add the resulting matrices to the returned result X.append( CMATRIX(x_sub) ) return X
[docs]def xyz_traj2gen_sp(infile, outfile, md_iter, sys_type): """ This file converts the xyz trajectory file in the format produced by DFTB+ to a gen file containing the `md_iter`-th step geometry Args: infile ( string ): the name of the input xyz trajectory file outfile ( string ): the name of a output gen single point file md_iter ( int ): index of the timeframe to extract sys_type ( string ): "C" or "P" - the system type - cluster or periodic Returns: none: but creates a file """ f = open(infile, "r") A = f.readlines() f.close() sz = len(A) # Determine the key info nat = int( float(A[0].split()[0] ) ) at_types = [] for i in range(0,nat): at_typ = A[i+2].split()[0] # atom type if at_typ not in at_types: at_types.append(at_typ) # Make up the output file line = "%5i %s \n" % (nat, sys_type) for typ in at_types: line = line + " %s " % (typ) line = line + "\n" for i in range(0,nat): ln_indx = (nat+2)*md_iter + (i + 2) tmp = A[ln_indx].split() at_indx = i + 1 at = tmp[0] typ_indx = at_types.index(at) + 1 x = float(tmp[1]) y = float(tmp[2]) z = float(tmp[3]) line = line + " %5i %3i %15.8f %15.8f %15.8f\n" % (at_indx, typ_indx, x, y, z) # Write the file f1 = open(outfile, "w") f1.write(line) f1.close()
[docs]def xyz_traj2gen_ovlp(infile, outfile, md_iter1, md_iter2, sys_type): """ This file converts the xyz trajectory file in the format produced by DFTB+ to a gen file containing the superimposed `md_iter1`-th and `md_iter2`-th steps geometries Args: infile ( string ): the name of the input xyz trajectory file outfile ( string ): the name of a output gen single point file md_iter1 ( int ): index of the first timeframe to extract md_iter2 ( int ): index of the second timeframe to extract sys_type ( string ): "C" or "P" - the system type - cluster or periodic Returns: none: but creates a file """ f = open(infile, "r") A = f.readlines() f.close() sz = len(A) # Determine the key info nat = int( float(A[0].split()[0] ) ) at_types = [] for i in range(0,nat): at_typ = A[i+2].split()[0] # atom type if at_typ not in at_types: at_types.append(at_typ) # Make up the output file line = "%5i %s \n" % (2*nat, sys_type) for typ in at_types: line = line + " %s " % (typ) line = line + "\n" for i in range(0,nat): ln_indx = (nat+2)*md_iter1 + (i + 2) tmp = A[ln_indx].split() at_indx = i + 1 at = tmp[0] typ_indx = at_types.index(at) + 1 x = float(tmp[1]) y = float(tmp[2]) z = float(tmp[3]) line = line + " %5i %3i %15.8f %15.8f %15.8f\n" % (at_indx, typ_indx, x, y, z) for i in range(0,nat): ln_indx = (nat+2)*md_iter2 + (i + 2) tmp = A[ln_indx].split() at_indx = nat + i + 1 at = tmp[0] typ_indx = at_types.index(at) + 1 x = float(tmp[1]) y = float(tmp[2]) z = float(tmp[3]) line = line + " %5i %3i %15.8f %15.8f %15.8f\n" % (at_indx, typ_indx, x, y, z) # Write the file f1 = open(outfile, "w") f1.write(line) f1.close()
[docs]def make_dftb_input(dftb_input_template_filename, istate, timestep=-1): """ This function creates an input file for DFTB+ package from a template file, it changes several placeholder lines to ensure the calculations are done for required electronic states Args: dftb_input_template_filename ( strings ): the name of the generic input file (template) for DFTB+ calculations istate ( int ): the index of the state, for which the calculations will be carried out timestep ( int ): the index of a given timestep along a MD trajectory. If not provided (-1), then the defaul name of "tmp.gen" will be used as the coordinate file name. If a time index is given, the name will be "coord-<timestep>.gen". Returns: None : just creates the files """ # Read in the template file f = open(dftb_input_template_filename, 'r') dftb_input_template = f.readlines() f.close() nlines = len(dftb_input_template) # Create the actual output file - just replace few parameters in the template file dftb_input = open("dftb_in.hsd","w"); for i in range( nlines ): dftb_input_template_line = dftb_input_template[i].split() if len(dftb_input_template_line) > 0: if dftb_input_template_line[0] == "<<<" and timestep == -1: dftb_input.write( ' <<< "tmp.gen"\n ') elif dftb_input_template_line[0] == "<<<" and timestep != -1: dftb_input.write( ' <<< "coord-'+str(timestep)+'.gen"\n ') elif dftb_input_template_line[0] == "StateOfInterest": dftb_input.write( F" StateOfInterest = {istate}\n" ) elif istate == 0 and dftb_input_template_line[0] == "ExcitedStateForces": dftb_input.write( " ExcitedStateForces = no\n" ) else: dftb_input.write(dftb_input_template[i]) else: dftb_input.write(dftb_input_template[i]) dftb_input.close()
[docs]def read_dftb_output(natoms, istate): """ This file reads in the total energy (essentially the reference, ground state energy), the excitation energies, and the corresponding forces on all atoms and return them in a digital format for further processing. Args: natoms ( int ): the number of atoms in the systemm istate ( int ): the index of the calculation - just for archiving purposes Returns: double, MATRIX(ndof, 1): the total energy of a given electronic state, and the corresponding gradients """ # Check the successful completion of the calculations like this: if os.path.isfile('detailed.out'): #print("Found detailed.out") os.system(F"cp detailed.out detailed_{istate}.out") else: print("\nCannot find the file detailed.out") print(F"Hint: Current working directory is: {os.getcwd()}") print("Is this where you expect the file detailed.out to be found?") print("Exiting program...\n") sys.exit(0) ndof = 3 * natoms grad = MATRIX(ndof, 1) f = open("detailed.out") output = f.readlines() f.close() E = 0.0 nlines = len(output) for i in range( nlines ): output_line = output[i].split() if len( output_line ) >= 2: if output_line[0] == "Total" and output_line[1] == "Forces": for j in range( natoms ): next_lines = output[i+j+1].split() grad.set( 3*j+0, 0, -float(next_lines[1]) ) grad.set( 3*j+1, 0, -float(next_lines[2]) ) grad.set( 3*j+2, 0, -float(next_lines[3]) ) if output_line[0] == "Excitation" and output_line[1] == "Energy:" : E += float(output_line[2]) # energy in a.u. #print(output_line[2]) if output_line[0] == "Total" and output_line[1] == "energy:" : E += float(output_line[2]) # energy in a.u. #print(output_line[2]) #print(F"Final energy = {E}") return E, grad
class tmp: pass
[docs]def run_dftb_adi(q, params_, full_id): """ This function executes the DFTB+ quantum chemistry calculations and returns the key properties needed for dynamical calculations. Args: q ( MATRIX(ndof, ntraj) ): coordinates of the particle [ units: Bohr ] params ( dictionary ): model parameters * **params["labels"]** ( list of strings ): the labels of atomic symbolc - for all atoms, and in a order that is consistent with the coordinates (in triples) stored in `q`. The number of this labels is `natoms`, such that `ndof` = 3 * `natoms`. [ Required ] * **params["nstates"]** ( int ): the total number of electronic states in this model [ default: 1 - just the ground state] * **params["dftb_input_template_filename"]** ( string ): the name of the input file template [ default: "dftb_input_template.hsd" ] * **params["dftbp_exe"]** ( string ): the full path to the DFTB+ executable [ defaut: "dftb+" ] * **params["xyz2gen_exe"]** ( string ): the full path to the xyz2gen executable (part of the DFTB+ package) [ defaut: "xyz2gen" ] Note: sometimes, especially if you are using conda-installed Python, you may need to edit the "xyz2gen" file in your DFTB+ installation to change the topmost line to point to the correct python executable (e.g. #! /home/alexey/miniconda2/envs/py37/bin/python) Returns: PyObject: obj, with the members: * obj.ham_adi ( CMATRIX(nstates,nstates) ): adiabatic Hamiltonian * obj.d1ham_adi ( list of ndof CMATRIX(nstates, nstates) objects ): derivatives of the adiabatic Hamiltonian w.r.t. the nuclear coordinate """ params = dict(params_) critical_params = [ "labels" ] default_params = { "dftb_input_template_filename":"dftb_input_template.hsd", "nstates":1, "dftb_exe":"dftb+", "xyz2gen_exe":"xyz2gen" } comn.check_input(params, default_params, critical_params) labels = params["labels"] dftb_input_template_filename = params["dftb_input_template_filename"] nstates = params["nstates"] dftb_exe = params["dftb_exe"] xyz2gen_exe = params["xyz2gen_exe"] natoms = len(labels) ndof = 3 * natoms obj = tmp() obj.ham_adi = CMATRIX(nstates, nstates) obj.nac_adi = CMATRIX(nstates, nstates) obj.hvib_adi = CMATRIX(nstates, nstates) obj.basis_transform = CMATRIX(nstates, nstates) obj.d1ham_adi = CMATRIXList(); obj.dc1_adi = CMATRIXList(); for idof in range(ndof): obj.d1ham_adi.append( CMATRIX(nstates, nstates) ) obj.dc1_adi.append( CMATRIX(nstates, nstates) ) Id = Cpp2Py(full_id) indx = Id[-1] # Make an xyz file # since the DFTB+ expects the coordinates in Angstrom, but Libra # goes with atomic units (Bohrs), hence expecting the `q` variable be # in Bohrs, we need to convert the coordinates from Bohrs to Angstroms coords_str = scan.coords2xyz(labels, q, indx, 1.0/units.Angst) f = open("tmp.xyz", "w"); f.write( F"{natoms}\nTemporary xyz file\n{coords_str}" ) f.close() # Convert xyz to gen format: tmp.xyz -> tmp.gen # The temporary working file MUST be called "tmp.gen" since this is # what the DFTB+ input will list - see the `make_dftbp_input` os.system(F"{xyz2gen_exe} tmp.xyz") for istate in range(nstates): # Update the input file make_dftb_input(dftb_input_template_filename, istate) # We have written the dftb+ input file for a certain state in nstates. Now we must compute the # state energies and forces. os.system( dftb_exe ) # At this point, we should have the "detailed.out" file created, so lets read it E, grad = read_dftb_output(natoms, istate) # Now, populate the allocated matrices obj.ham_adi.set(istate, istate, E * (1.0+0.0j) ) obj.hvib_adi.set(istate, istate, E * (1.0+0.0j) ) obj.basis_transform.set(istate, istate, 1.0+0.0j ) for idof in range(ndof): obj.d1ham_adi[idof].set(istate, istate, grad.get(idof, 0) * (1.0+0.0j) ) return obj
[docs]def dftb_distribute( istep, fstep, nsteps_this_job, trajectory_xyz_file, dftb_input, waveplot_input, curr_job_number ): """ Distributes dftb jobs for trivial parallelization Make sure that your dftb input file has absolute paths to the following input parameters: SlaterKosterFiles = Type2FileNames { Prefix = "/panasas/scratch/grp-alexeyak/brendan/dftbp_development/" Separator = "-" Suffix = ".skf" } Args: istep (integer): The initial time step in the trajectory xyz file. fstep (integer): The final time step in the trajectory xyz file. nsteps_this_job (integer): The number of steps for this job. trajectory_xyz_file (string): The full path to trajectory xyz file. dftb_input (string): the dftb_input_template.hsd file waveplot_input (string): the input file for the waveplot subpackage of dftbplus for generating cubefiles curr_job_number (integer): The current job number. Returns: None """ # Now we need to distribute the jobs into job batches # First, make a working directory where the calculations will take place os.chdir("wd") nsteps = fstep - istep + 1 njobs = int( nsteps / nsteps_this_job ) # Initialize the curr_step to istep curr_step = istep # Make the job directory folders os.system("mkdir job"+str(curr_job_number)+"") os.chdir("job"+str(curr_job_number)+"") # Copy the trajectory file and input template there os.system("cp ../../"+trajectory_xyz_file+" .") os.system("cp ../../"+dftb_input+" .") os.system("cp ../../"+waveplot_input+" .") # Now, we need to edit the submit file # Now, in jobs folder njob, we should do only a certain number of steps for step in range( nsteps_this_job ): # extract the curr_step xyz coordianates from the trajectory file and write it to another xyz file CP2K_methods.read_trajectory_xyz_file( trajectory_xyz_file, curr_step ) curr_step += 1 # Go back to the main directory os.chdir("../../")
[docs]def get_dftb_ks_energies( params ): """ This function reads the band.out file generated from a dftb+ computations. The band.out file stores the ks energies and their occupation. Args: Returns: E (1D numpy array): The vector consisting of the KS energies from min_band to max_band. total_energy (float): The total energy obtained from the log file. """ # Critical parameters critical_params = [ "logfile_name", "min_band", "max_band" ] # Default parameters default_params = { "time":0, "spin": 1} # Check input comn.check_input(params, default_params, critical_params) dftb_outfile_name = params["logfile_name"] # Time step, for molecular dynamics it will read the energies # of time step 'time', but for static calculations the time is set to 0 time = params["time"] # The minimum state number min_band = params["min_band"] # ks_orbital_indicies[0] # The maximum state number max_band = params["max_band"] # ks_orbital_indicies[-1] spin = params["spin"] # First openning the file and reading its lines f = open( dftb_outfile_name, 'r' ) lines = f.readlines() f.close() # The lines containing the energies of the occupied states occ_energies = [] # The lines containing the energies of the unoccupied states unocc_energies = [] # Set the total energy to zero total_energy = 0.0 # For the dftb output file band.out, start from line 1 for i in range(1,len(lines)): if i >= min_band and i < max_band+1: b = lines[i].strip().split() if float(b[2]) > 0.0: occ_energies.append( float(b[1]) * units.ev2Ha ) else: unocc_energies.append( float(b[1]) * units.ev2Ha ) # Turn them into numpy arrays occ_energies = np.array(occ_energies) unocc_energies = np.array(unocc_energies) # Concatenate the occupied and unoccpied energies so we can choose from min_band to max_band ks_energies = np.concatenate( (occ_energies, unocc_energies) ) print("ks_energies", ks_energies) return ks_energies, total_energy
[docs]def cube_generator_dftbplus( project_name, time_step, min_band, max_band, waveplot_exe, isUKS ): """ This function generates the cube files by first forming the 'fchk' file from 'chk' file. Then it will generate the cube files from min_band to max_band the same as CP2K naming. This will helps us to use the read_cube and integrate_cube functions easier. Args: project_name (string): The project_name. time_step (integer): The time step. min_band (integer): The minimum state number. max_band (integer): The maximum state number. waveplot_exe (str): Location of the executable for the waveplot program of the dftb+ software package isUKS (integer): The unrestricted spin calculation flag. 1 is for spin unrestricted calculations. Other numbers are for spin restricted calculations Returns: None """ # Make the cubefiles. For dftb+, this simply means executing the waveplot program # The min and max band are already defind in the waveplot template. So, we just use # them here for renaming the cubefiles os.system(waveplot_exe) # For now, only spin-restricted for state in range(min_band,max_band+1): # Use cp2k names because the rest of the code expects this format state_name = CP2K_methods.state_num_cp2k(state) cube_name = '%s-%d-WFN_%s_1-1_0.cube'%(project_name, time_step, state_name) print('Renaming cube for state %d'%state) # Now, rename the cubefile from what waveplots calls it to the standard format os.system("mv *"+str(state)+"-real.cube "+cube_name)
[docs]def read_dftbplus_TRA_file( params ): """ This function reads TRA.dat files generated from TD-DFTB calculations using DFTB+ and returns the TD-DFTB excitation energies and CI-like coefficients Args: params ( dictionary ): parameters dictionary logfile_name ( string ): this is actualyl the TRA.dat file, but we keep it as logfile_name for ease, as many other similar type functions (for cp2k, gaussian) rely on this format internally. number_of_states ( int ): how many ci states to consider tolerance ( float ): the tolerance for accepting SDs are members of the CI wavefunctions isUKS ( Boolean ): flag for whether or not a spin-polarized Kohn-Sham basis is being used. TRUE means that a spin-polarized Kohn-Sham basis is being used. Returns: excitation_energies ( list ): The excitation energies in the log file. ci_basis ( list ): The CI-basis configuration. ci_coefficients ( list ): The coefficients of the CI-states. spin_components (list): The spin component of the excited states. """ # Critical parameters critical_params = [ "logfile_name", "number_of_states" ] # Default parameters default_params = { "tolerance":0.001, "isUKS": 0} # Check input comn.check_input(params, default_params, critical_params) logfile_name = params["logfile_name"] number_of_states = int(params["number_of_states"]) tolerance = float(params["tolerance"]) isUKS = int(params["isUKS"]) f = open( logfile_name, 'r' ) lines = f.readlines() f.close() # Make lists for excitation energies and the lines where the keyword "Energy" is excitation_energies = [] energy_lines = [] for i in range( 0, len(lines) ): tmp_line = lines[i].split() if 'Energy' in tmp_line: print("Energy") # When found the line in which contains 'Energy' excitation_energies.append( float(tmp_line[2]) ) energy_lines.append( i ) elif len( energy_lines ) == number_of_states: break #print( energy_lines ) #sys.exit(0) # Obtain CI-like coefficients # Spin-unpolarized only as of 11/6/2020 # Start from 4 lines after finding the line contaning 'Energy'. This is how it is in DFTB v. 19.1 nlines_to_skip = 4 ci_basis = [] ci_coefficients = [] spin_components = [] for i in energy_lines: tmp_spin = [] tmp_state = [] tmp_state_coefficients = [] for j in range( i + nlines_to_skip, len(lines) ): tmp_line = lines[j].split() if len( tmp_line ) == 0: break else: ci_coefficient = float( tmp_line[3] ) if ci_coefficient**2 > tolerance: tmp_spin.append( "alp" ) tmp_state.append( [ int( tmp_line[0] ), int( tmp_line[2] ) ] ) tmp_state_coefficients.append( ci_coefficient ) # Append the CI-basis and and their coefficients for # this state into the ci_basis and ci_coefficients lists ci_basis.append( tmp_state ) ci_coefficients.append( tmp_state_coefficients ) spin_components.append( tmp_spin ) return excitation_energies[0:number_of_states], ci_basis, ci_coefficients, spin_components