#*********************************************************************************
#* Copyright (C) 2020 Mohammad Shakiba, Brendan Smith, Alexey V. Akimov
#* This file is distributed under the terms of the GNU General Public License
#* as published by the Free Software Foundation, either version 3 of
#* the License, or (at your option) any later version.
#* See the file LICENSE in the root directory of this distribution
#* or <http://www.gnu.org/licenses/>.
#*
#*********************************************************************************/
"""
.. module:: CP2K_methods
:platform: Unix, Windows
:synopsis: This module implements functions for processing the CP2K inputs and outputs.
.. moduleauthors::
Mohammad Shakiba, Brendan Smith, Alexey V. Akimov
"""
import os
import sys
import math
import re
import numpy as np
if sys.platform=="cygwin":
from cyglibra_core import *
elif sys.platform=="linux" or sys.platform=="linux2":
from liblibra_core import *
import util.libutil as comn
[docs]def ndigits( integer_number: int ):
"""
This function calculates the number of digits for an integer number.
Args:
integer_number ( integer ): An integer number.
Returns:
digit_num ( integer ): The number of digits for 'num'.
"""
# Setting up the digit number counter
digit_num = 0
while( integer_number > 0 ):
digit_num = digit_num+1
integer_number = integer_number//10
return digit_num
[docs]def state_num_cp2k(state_num: int):
"""
This function turns an integer number to a string. This string
has the format in five digits and if the number of digits is less than
five it will append zeros before the number. This is necessary to read
the .cube files produced by CP2K.
Args:
state_num (integer): Integer number which is in fact the energy level.
Returns:
state_num_str (string): The number in five digits (or more) in string format.
"""
# Setting up an initial string
state_num_str = ''
# Now count the number of digits for the state sumber
# and then add 0 before the number in the string
if ndigits( state_num ) == 1:
state_num_str = '0000%d' % state_num
elif ndigits( state_num ) == 2:
state_num_str = '000%d' % state_num
elif ndigits( state_num ) == 3:
state_num_str = '00%d' % state_num
elif ndigits( state_num ) == 4:
state_num_str = '0%d' % state_num
else:
state_num_str = '%d' % state_num
return state_num_str
[docs]def cube_file_names_cp2k( params ):
"""
This function will produce the cube file names produced by CP2K both for energy calculations
and molecular dynamics.
Args:
params( dictionary ): contains dynamical paramters
project_name ( string ): The project name used in CP2K input file.
min_band ( integer ): The minimum state number to be considered.
max_band ( integer ): The maximum state number to be considered.
curr_step ( integer ): The time current step in CP2K calculations.
isUKS ( int ): 1 if spin-polarized calculations used
Returns:
names ( list ): The list of cube file names.
"""
# Critical parameters
critical_params = [ "project_name", "min_band", "max_band", "curr_step" ]
# Default parameters
default_params = { "isUKS":0 }
# Check input
comn.check_input(params, default_params, critical_params)
project_name = params["project_name"]
min_band = params["min_band"]
max_band = params["max_band"]
curr_step = params["curr_step"]
isUKS = params["isUKS"]
es_software = params["es_software"]
# isUKS needs to be epressed as a string, even though it is used as a Boolean in other parts of the workflows
spin = []
if isUKS == 1:
spin = [1,2]
else:
spin = [1]
# Define the names of the cube files for time t.
cube_file_names = []
for i in range( min_band, max_band+1 ):
# Using the function state_num_cp2k to obtain the names of the .cube files for state 'i'.
state_num = state_num_cp2k( i )
# For the current time step "curr_step"
if isUKS == 1:
if es_software == "cp2k" or es_software == "gaussian":
cube_file_name_of_band_i = str('cubefiles/%s-%d-WFN_%s_%d-1_0.cube' % ( project_name, curr_step, state_num, spin[0] ) )
cube_file_name_of_band_j = str('cubefiles/%s-%d-WFN_%s_%d-1_0.cube' % ( project_name, curr_step, state_num, spin[1] ) )
cube_file_names.append( cube_file_name_of_band_i )
cube_file_names.append( cube_file_name_of_band_j )
elif software == "dftb+":
print("\nReading cubefiles from spin-polarized calculations not currently available")
print("Exiting now")
sys.exit(0)
else:
# Using the function state_num_cp2k to obtain the names of the .cube files for state 'i'.
state_num = state_num_cp2k( i )
cube_file_name_of_band_i = str('cubefiles/%s-%d-WFN_%s_%d-1_0.cube' % ( project_name, curr_step, state_num, spin[0] ) )
cube_file_names.append( cube_file_name_of_band_i )
return cube_file_names
[docs]def read_cp2k_tddfpt_log_file( params ):
"""
This function reads log files generated from TD-DFPT calculations using CP2K and returns the TD-DFPT
excitation energies, Slater determinent states in terms of the Kohn-Sham orbital indicies,
and the Slater determinent coefficients that comprise the multi-configurational electronic states
Args:
params ( dictionary ): parameters dictionary
logfile_name ( string ): name of the log file for this particular timestep
number_of_states ( int ): how many ci states to consider
tolerance ( float ): the tolerance for accepting SDs are members of the CI wavefunctions
isUKS ( Boolean ): flag for whether or not a spin-polarized Kohn-Sham basis is being used. TRUE means
that a spin-polarized Kohn-Sham basis is being used.
Returns:
excitation_energies ( list ): The excitation energies in the log file.
ci_basis ( list ): The CI-basis configuration.
ci_coefficients ( list ): The coefficients of the CI-states.
spin_components (list): The spin component of the excited states.
"""
# Critical parameters
critical_params = [ "logfile_name", "number_of_states" ]
# Default parameters
default_params = { "tolerance":0.05, "isUKS": 0}
# Check input
comn.check_input(params, default_params, critical_params)
logfile_name = params["logfile_name"]
number_of_states = int(params["number_of_states"])
tolerance = float(params["tolerance"])
isUKS = int(params["isUKS"])
f = open( logfile_name, 'r' )
lines = f.readlines()
f.close()
for i in range( 0, len(lines) ):
tmp_line = lines[i].lower().split()
if 'excitation' in tmp_line:
if 'analysis' in tmp_line:
# When found the line in which contains 'Excitation analysis' in
# the log file, append it in the variable exc_anal_line
exc_anal_line = i
if 'states' in tmp_line:
if 'multiplicity' in tmp_line:
# Here we search for the line that contains 'R-TDDFPT states of multiplicity 1'
# or 'U-TDDFPT states of multiplicity 1' in the log file
r_tddfpt_line = i
excitation_energies = []
# Start from 5 lines after finding the line contaning 'R-TDDFPT states of multiplicity 1'
# This is because they contain the energies from that line.
for i in range( r_tddfpt_line+5, len( lines ) ):
tmp_line = lines[i].split()
if len( tmp_line ) == 0:
break
excitation_energies.append( float( tmp_line[2] ) )
# Start from 5 lines after finding the line contaning 'Excitation analysis'
# From that point we have the state numbers with their configurations.
# So, we append the lines which contain only 'State number' and stop
# whenever we reach to a blank line.
state_num_lines = []
for i in range( exc_anal_line+5, len( lines ) ):
tmp_line = lines[i].split()
if isUKS == 1:
if len( tmp_line ) == 0 or '----' in lines[i]:
state_num_lines.append( i )
break
if len( tmp_line )==1 or len( tmp_line )==3:
state_num_lines.append(i)
else:
if len( tmp_line ) == 0 or '----' in lines[i]:
state_num_lines.append( i )
break
if len(lines[i].split())==1:
state_num_lines.append(i)
elif len( tmp_line ) > 1:
if tmp_line[0].isdigit() and not( tmp_line[1].isdigit() ):
state_num_lines.append( i )
# Setting up the CI-basis list and their coefficients list
ci_basis = []
ci_coefficients = []
spin_components = []
for i in range( number_of_states ):
# states and their coefficients for each excited state
tmp_state = []
tmp_state_coefficients = []
tmp_spin = []
for j in range( state_num_lines[i]+1, state_num_lines[i+1] ):
# Splitting lines[j] into tmp_splitted_line
tmp_splitted_line = lines[j].split()
# If we are using the spin-polarized Kohn-Sham basis, then the size of the split line is different
# from the size of the split line when using the spin-unpolarized Kohn-Sham basis
if isUKS == 1:
ci_coefficient = float( tmp_splitted_line[4] )
if ci_coefficient**2 > tolerance:
# We need to remove the paranthesis from the 2nd element of the temporary splitted line
tmp_spin.append( tmp_splitted_line[1].replace('(','').replace(')','') )
tmp_state.append( [ int( tmp_splitted_line[0] ), int( tmp_splitted_line[2] ) ] )
tmp_state_coefficients.append( ci_coefficient )
# Here, we have the spin-unpolarize Kohn-Sham basis
# For this case, spin-components will just return all alpha
else:
ci_coefficient = float( tmp_splitted_line[2] )
if ci_coefficient**2 > tolerance:
tmp_spin.append( "alp" )
tmp_state.append( [ int( tmp_splitted_line[0] ), int( tmp_splitted_line[1] ) ] )
tmp_state_coefficients.append( ci_coefficient )
# Append the CI-basis and and their coefficients for
# this state into the ci_basis and ci_coefficients lists
ci_basis.append( tmp_state )
ci_coefficients.append( tmp_state_coefficients )
spin_components.append( tmp_spin )
return excitation_energies[0:number_of_states], ci_basis, ci_coefficients, spin_components
[docs]def cp2k_distribute( istep, fstep, nsteps_this_job, cp2k_positions, cp2k_input, curr_job_number ):
"""
Distributes cp2k jobs for trivial parallelization
*** Make sure that your cp2k input file has absolute paths to the following input parameters:
BASIS
POTENTIAL
< any other file dependent parameter (dftd3, etc) >
Args:
istep (int): The initial time step for molecular dynamics trajectory which
the user wants to start the calculations with. This paramter
is set in the input file for submitting the jobs.
fstep (int): The final time step for molecular dynamics trajectory which
the user wants to start the calculations with. This paramter
is set in the input file for submitting the jobs.
nsteps_this_job (int): The number of step for the job to be distributed.
cp2k_positions (str): The full path to the molecular dynamics trajectory xyz file.
curr_job_number (int): The current job number.
Returns:
None
"""
# Now we need to distribute the jobs into job batches
# First, make a working directory where the calculations will take place
os.chdir("wd")
# total number of steps
nsteps = fstep - istep + 1
# number of jobs obtained from nsteps/nsteps_this_job
njobs = int( nsteps / nsteps_this_job )
# The current step is set to istep
curr_step = istep
# Making the job directories in the wd folder like job0, job1, ...
os.system("mkdir job"+str(curr_job_number)+"")
# Chnage directory to job folder
os.chdir("job"+str(curr_job_number)+"")
# Copy the trajectory xyz file
os.system("cp ../../"+cp2k_positions+" .")
# Copy the CP2K sample input
os.system("cp ../../"+cp2k_input+" .")
# Go back to the main directory
os.chdir("../../")
[docs]def read_trajectory_xyz_file(file_name: str, step: int):
"""
This function reads the trajectory of a molecular dynamics .xyz file and
extract the 'step' th step then writes it to coord-step.xyz file. This function
is used in single point calculations for calculations of the NACs where one has
previously obtained the trajectory via any molecular dynamics packages.
Args:
file_name (string): The trajectory .xyz file name.
step (integer): The desired time to extract its .xyz
coordinates which starts from zero.
Returns:
None
"""
f = open(file_name,'r')
lines = f.readlines()
f.close()
# The number of atoms for each time step in the .xyz file of the trajectory.
number_of_atoms = int(lines[0].split()[0])
# Write the coordinates of the 't' th step in file coord-t.xyz
f = open('coord-%d'%step+'.xyz','w')
# This is used to skip the first two lines for each time step.
n = number_of_atoms+2
# Write the coordinates of the 'step'th time step into the file
for i in range( n * step, n * ( step + 1 ) ):
f.write( lines[i] )
f.close()
[docs]def read_energies_from_cp2k_log_file( params ):
"""
This function reads the energies from CP2K log file for a specific spin.
Args:
params (dictionary):
logfile_name (str): The output file produced by CP2K
time (int): The time step of the molecular dynamics. For single point calculations
it should be set to 0.
min_band (int): The minimum state number.
max_band (int): The maximum state number.
spin (int): This number can only accept two values: 1 for alpha and 2 for beta spin.
Returns:
E (1D numpy array): The vector consisting of the KS energies from min_band to max_band.
total_energy (float): The total energy obtained from the log file.
"""
# Critical parameters
critical_params = [ "logfile_name", "min_band", "max_band" ]
# Default parameters
default_params = { "time":0, "spin": 1}
# Check input
comn.check_input(params, default_params, critical_params)
cp2k_log_file_name = params["logfile_name"]
# Time step, for molecular dynamics it will read the energies
# of time step 'time', but for static calculations the time is set to 0
time = params["time"]
# Koh-Sham orbital indicies
# ks_orbital_indicies = params["ks_orbital_indicies"]
# The minimum state number
min_band = params["min_band"] # ks_orbital_indicies[0]
# The maximum state number
max_band = params["max_band"] # ks_orbital_indicies[-1]
spin = params["spin"]
# First openning the file and reading its lines
f = open( cp2k_log_file_name, 'r' )
lines = f.readlines()
f.close()
# The lines containing 'Eigenvalues of the occupied subspace'
lines_with_occupied = []
# The lines containing 'Eigenvalues of the unoccupied subspace'
lines_with_unoccupied = []
# The lines containing the energies of the occupied states
occ_energies_last_line = []
# The lines containing the energies of the unoccupied states
unocc_energies_last_line = []
# Set the total energy to zero
total_energy = 0.0
for i in range(0,len(lines)):
# Find the occupied states lines
if 'Eigenvalues of the occupied subspace'.lower() in lines[i].lower():
if str( spin ) in lines[i].lower().split():
lines_with_occupied.append( i )
for j in range( i, len( lines ) ):
# Find the last line number containing the energies for occupied states
if len( lines[j].split() ) == 0:
occ_energies_last_line.append( j )
break
# Find the unoccupied states lines
if 'Eigenvalues of the unoccupied subspace'.lower() in lines[i].lower():
if str( spin ) in lines[i].lower().split():
lines_with_unoccupied.append(i)
for j in range( i, len( lines ) ):
# Find the last line number containing the energies for unoccupied states
if len( lines[j].split() ) == 0:
unocc_energies_last_line.append( j )
break
if "Total energy:" in lines[i]:
total_energy = float( lines[i].split()[2] )
# The Kohn-Sham energies
ks_energies = []
# Start after two lines of the 'Eigenvalues of the occupied subspace'
for i in range( lines_with_occupied[time] + 2, occ_energies_last_line[time] - 1 ):
for j in range(0,len(lines[i].split())):
ks_energies.append(float(lines[i].split()[j]))
# Start after two lines of the 'Eigenvalues of the unoccupied subspace'
for i in range( lines_with_unoccupied[time] + 2, unocc_energies_last_line[time] ):
if not 'Reached'.lower() in lines[i].lower().split():
for j in range(0,len(lines[i].split())):
ks_energies.append(float(lines[i].split()[j]))
# Now appending all the energies into a numpy array
ks_energies = np.array(ks_energies)
# Returning the energeis from min_band to max_band
return ks_energies[min_band-1:max_band], total_energy
[docs]def read_energies_from_cp2k_md_log_file( params ):
"""
This function reads the energies from CP2K molecular dynamics log file for a specific spin. The difference between this function
and the function `read_energies_from_cp2k_log_file` is that it is more efficient and faster for molecular dynamics energy levels.
Args:
params (dictionary):
logfile_name (str): The output file produced by CP2K
init_time (int): The initial time step of the molecular dynamics.
final_time (int): The final time step of the molecular dynamics.
min_band (int): The minimum state number.
max_band (int): The maximum state number.
spin (int): This number can only accept two values: 1 for alpha and 2 for beta spin.
Returns:
E (1D numpy array): The vector consisting of the KS energies from min_band to max_band.
total_energy (float): The total energy obtained from the log file.
"""
# Critical parameters
critical_params = [ "logfile_name", "min_band", "max_band" ]
# Default parameters
default_params = { "spin": 1, "init_time": 0, "final_time": 1}
# Check input
comn.check_input(params, default_params, critical_params)
cp2k_log_file_name = params["logfile_name"]
# Time step, for molecular dynamics it will read the energies
# of time step 'time', but for static calculations the time is set to 0
init_time = params["init_time"]
final_time = params["final_time"]
# Koh-Sham orbital indicies
# ks_orbital_indicies = params["ks_orbital_indicies"]
# The minimum state number
min_band = params["min_band"] # ks_orbital_indicies[0]
# The maximum state number
max_band = params["max_band"] # ks_orbital_indicies[-1]
spin = params["spin"]
# First openning the file and reading its lines
f = open( cp2k_log_file_name, 'r' )
lines = f.readlines()
f.close()
# The lines containing 'Eigenvalues of the occupied subspace'
lines_with_occupied = []
# The lines containing 'Eigenvalues of the unoccupied subspace'
lines_with_unoccupied = []
# The lines containing the energies of the occupied states
occ_energies_last_line = []
# The lines containing the energies of the unoccupied states
unocc_energies_last_line = []
# Set the total energy to zero
total_energy = 0.0
for i in range(0,len(lines)):
# Find the occupied states lines
if 'Eigenvalues of the occupied subspace'.lower() in lines[i].lower():
if str( spin ) in lines[i].lower().split():
lines_with_occupied.append( i )
for j in range( i, len( lines ) ):
# Find the last line number containing the energies for occupied states
if len( lines[j].split() ) == 0:
occ_energies_last_line.append( j )
break
# Find the unoccupied states lines
if 'Eigenvalues of the unoccupied subspace'.lower() in lines[i].lower():
if str( spin ) in lines[i].lower().split():
lines_with_unoccupied.append(i)
for j in range( i, len( lines ) ):
# Find the last line number containing the energies for unoccupied states
if len( lines[j].split() ) == 0:
unocc_energies_last_line.append( j )
break
if "Total energy:" in lines[i]:
total_energy = float( lines[i].split()[2] )
# All KS energies
KS_energies = []
for time in range(init_time,final_time):
# The Kohn-Sham energies
ks_energies = []
# Start after two lines of the 'Eigenvalues of the occupied subspace'
for i in range( lines_with_occupied[time] + 2, occ_energies_last_line[time] - 1 ):
for j in range(0,len(lines[i].split())):
ks_energies.append(float(lines[i].split()[j]))
# Start after two lines of the 'Eigenvalues of the unoccupied subspace'
for i in range( lines_with_unoccupied[time] + 2, unocc_energies_last_line[time] ):
if not 'Reached'.lower() in lines[i].lower().split():
for j in range(0,len(lines[i].split())):
ks_energies.append(float(lines[i].split()[j]))
# Now appending all the energies into a numpy array
ks_energies = np.array(ks_energies)
KS_energies.append(ks_energies[min_band-1:max_band])
# Returning the energeis from min_band to max_band
return KS_energies, total_energy