Source code for libra_py.hpc_utils

#***********************************************************
# * Copyright (C) 2013-2019 Alexey V. Akimov
# * This file is distributed under the terms of the
# * GNU General Public License as published by the
# * Free Software Foundation; either version 3 of the
# * License, or (at your option) any later version.
# * http://www.gnu.org/copyleft/gpl.txt
#***********************************************************/
"""
.. module:: hpc_utils
   :platform: Unix, Windows
   :synopsis: 
       This module implements the functionality to run distributed calculations
       on the HPC

.. moduleauthor:: Alexey V. Akimov

"""


import os
import sys
import time
import re


[docs]def substitute(filename_template, out_filename, params): """ Make a copy of a template file but with some variables replaced Args: filename_template ( string ): the name of the file that serves as a template out_filename ( string ): the name of the output (result) file params ( dictionary ): the keys of this dictionary should are the strings (more generally regular expressions) looked for, the values - are the strings with which found regexes will be substituted. Returns: None: but creates the out_filename files """ f = open(filename_template,"r") A = f.readlines() f.close() f_out = open("%s" % (out_filename), "w") for a in A: b = a for key in params.keys(): b = re.sub(key, params[key], b) f_out.write(b) f_out.close()
[docs]def make_submit(Nstart,Nend,job_dir,submit_templ): """ This function makes a PBS/SLURM submit file in a given job directory using a template file. The function looks for the placeholders specified by "param1= " and "param2= " and appends actual values of these parameters as needed. Args: Nstart ( int ): index of a starting parameter Nend ( int ): index of a finishing parameter job_dir ( string ): name of the directory where this file will be printed out to submit_templ ( string ): the template file name Returns: None: but generates actual submit files """ f = open(submit_templ,"r") A = f.readlines() f.close() f_out = open("%s/%s" % (job_dir,submit_templ), "w") for a in A: if a.find("param1=")!=-1: a = a[:-1] + str(Nstart) + "\n" elif a.find("param2=")!=-1: a = a[:-1] + str(Nend) + "\n" f_out.write(a) f_out.write("\n") f_out.close();
[docs]def job(Nstart,Nend,job_dir,prefixes): """ This function prepares a group of input files to be executed as a single job note the <job> here refers to a single PBS/SLURM submission file and may be executed on several processors. Args: Nstart ( int ): index of a starting parameter Nend ( int ): index of a finishing parameter job_dir ( string ): name of the directory where this file will be printed out to prefix ( string ): the template file name [e.g. "x.scf"]. Returns: None: just moves the files Example: >>> job(0, 5, "job0", "x.scf") Will: 1. Create a directory "job0" 2. Move filex x.scf.0.in to x.scf.5.in to the directory "job0" """ # Create <job_dir> directory if it does not exist yet if os.path.isdir(job_dir): pass else: os.system("mkdir %s" % job_dir) # Copy all files i = Nstart while i<=Nend: for prefix in prefixes: os.system("cp %s.%d.in %s" % (prefix,i,job_dir)) i = i + 1
[docs]def distribute(Nmin,Nmax,max_steps,submit_templ,exp_files,prefixes,do_submit): """ This function creates a number of jobs from the pool of input files which start from ```Nmin``` to ```Nmax``` with the maximal number of input files in one job given by ```max_steps```. It also starts the job in a given directory Args: Nmin ( int ): minimal index of the file (a part of the file name) in the pool Nmax ( int ): maximal index of the file (a part of the file name) in the pool max_steps ( int ): how many files to handle in one job submit_templ ( string ): the filename of the PBS/SLURM submit file that contains all the information on how to run the jobs involving a subset of files, but doesn't specify which particular files to handle. These parameters will be automatically setup by this funciton exp_files ( list of strings ): is a list of the input files defining how to do an export of QE wavefunctions prefixes ( list of strings ): is a list of prefixes of the files to be distributed do_submit ( int ): a flag to choose if we actually want to submit the jobs (do_submit==1 || ==2 || ==3 ) or only distribute the files (otherwise): - 0: only discribute files, no actual execution - 1: distribute and submit using PBS - 2: distribute and submit using SLURM - 3: distribute and submit using PYTHON no scheduling systems - good for runs on a local computer Returns: None: just organizes the execution of the calculations """ j = 0 # job index Nstart = 0 Nend = max_steps-1 njobs = int( (Nmax -1 - Nmin)/max_steps ) while j<njobs: job(Nstart,Nend+1,"job%d" % j,prefixes) # add 1 to avoid merging adjacent runs if do_submit in [0, 1, 2]: make_submit(Nstart,Nend+1,"job%d" % j,submit_templ) Nstart = Nstart + max_steps Nend = Nend + max_steps # Go into that directory and submit the job os.chdir("job%d" % j) for exp_file in exp_files: os.system("cp ../%s ." % exp_file) if do_submit==1: os.system("qsub %s" % submit_templ) time.sleep(10) elif do_submit==2: os.system("sbatch %s" % submit_templ) time.sleep(10) os.chdir("../") j = j + 1 job(Nstart,min(Nend,Nmax),"job%d" % j,prefixes) if do_submit in [0, 1, 2]: make_submit(Nstart,min(Nend,Nmax),"job%d" % j,submit_templ) os.chdir("job%d" % j) for exp_file in exp_files: os.system("cp ../%s ." % exp_file) if do_submit==1: os.system("qsub %s" % submit_templ) time.sleep(10) # we need to wait some time before submitting a new job - to make sure the memory is available elif do_submit==2: os.system("sbatch %s" % submit_templ) time.sleep(10) os.chdir("../")