Source code for libra_py.LoadMolecule

#*********************************************************************************
#* Copyright (C) 2015-2019 Alexey V. Akimov
#*
#* This file is distributed under the terms of the GNU General Public License
#* as published by the Free Software Foundation, either version 2 of
#* the License, or (at your option) any later version.
#* See the file LICENSE in the root directory of this distribution
#* or <http://www.gnu.org/licenses/>.
#*
#*********************************************************************************/
"""
.. module:: LoadMolecule
   :platform: Unix, Windows
   :synopsis: This module implements functions for loading data into a Chemobject 
       objects - by reading formatted data files. 
.. moduleauthor:: Alexey V. Akimov

"""
import re
import os
import sys
import math

if sys.platform=="cygwin":
    from cyglibra_core import *
elif sys.platform=="linux" or sys.platform=="linux2":
    from liblibra_core import *

from .regexlib import *
from . import units


[docs]def Load_Molecule(univ,syst,mol_file,format, verbosity=0): """Load molecular system from various formats Specify format manually (this gives flexibility) of format recognizable by program Args: univ ( Universe object ): Contains the basic information about chemical elements syst ( System ): The Chemobjects object that represents molecular system - this is what we construct mol_file ( string ): The name of the file containing the molecular structure format ( string ): The name of the format according to which the file "mol_file" is assumed to be formatted Available options are: * pdb * pdb_1 * true_pdb * true_pdb2 * xyz * iqmol_pdb Returns: None: but the `syst` object is modified to add the atoms and bonds, and to group atoms together Note: We assume that the coordinates in the files read are given in Angsrom. However, the syst stores this data in the atomic units (Borh), so conversion happens """ #------- Here we define a format of file ---------- # p - means 'Pattern' pAtom_keyword = '(?P<Atom_keyword>'+'HETATM'+')'+SP pAtom_keyword1= '(?P<Atom_keyword>'+'ATOM'+')'+SP pAtom_id = '(?P<Atom_id>'+DOUBLE+')'+SP pAtom_element = '(?P<Atom_element>'+ID+')'+SP pAtom_mol = '(?P<Atom_mol>'+WORD+')'+SP pAtom_chain = '(?P<Atom_chain>'+WORD+')'+SP pAtom_id1 = '(?P<Atom_id1>'+DOUBLE+')'+SP pAtom_type = '(?P<Atom_type>'+INT+')'+SP pAtom_occ = '(?P<Atom_occ>'+DOUBLE+')'+SP pAtom_charge = '(?P<Atom_charge>'+DOUBLE+')'+SP pAtom_C60 = '(?P<Atom_C60>'+NINT+')'+SP pAtom_name = '(?P<Atom_name>'+WORD+')'+SP if format=="pdb": Atom_Record = pAtom_keyword + pAtom_id + pElement_name + pAtom_id1 + pX_val + pY_val + pZ_val + pAtom_charge elif format=="pdb_1": Atom_Record = pAtom_keyword + pAtom_id + pElement_name + pAtom_id1 + pX_val + pY_val + pZ_val + pAtom_chain elif format=="true_pdb": Atom_Record = pAtom_keyword + pAtom_id + pElement_name + pAtom_mol + pAtom_chain + pAtom_id1 + pX_val + pY_val + pZ_val + pAtom_occ + pAtom_charge elif format=="true_pdb2": Atom_Record = pAtom_keyword1 + pAtom_id + pAtom_element + pAtom_mol + pAtom_chain + pAtom_id1 + pX_val + pY_val + pZ_val + pAtom_occ + pAtom_charge elif format=="xyz": Atom_Record = pElement_name + pX_val + pY_val + pZ_val elif format=="iqmol_pdb": Atom_Record = pAtom_keyword + pAtom_id + pElement_name + pAtom_chain + pAtom_id1 + pX_val + pY_val + pZ_val + pAtom_occ + pAtom_charge + pAtom_name pBond_keyword = '(?P<Bond_keyword>'+'CONECT'+')'+SP Bond_Record = pBond_keyword + '('+pAtom_id+')+' pFrag_keyword1 = '(?P<Frag_keyword1>'+'GROUP'+')'+SP pFrag_id = '(?P<Frag_id>'+INT+')'+SP pFrag_keyword2 = '(?P<Frag_keyword2>'+'FRAGNAME'+')'+SP pFrag_fragname = '(?P<Frag_fragname>'+ID+')'+SP pFrag_atom1name= '(?P<Frag_atom1name>'+ID+')'+SP pFrag_atom2name= '(?P<Frag_atom2name>'+ID+')'+SP pFrag_atom3name= '(?P<Frag_atom3name>'+ID+')'+SP Fragment_Record = pFrag_keyword1 + '('+pFrag_id+')+' + \ pFrag_keyword2 + pFrag_fragname + \ pFrag_atom1name + pFrag_atom2name + \ pFrag_atom3name f = open(mol_file,'r') A = f.readlines() D = C = B = A f.close() #---------- Create atoms ---------------- for a in A: m1 = re.search(Atom_Record,a) if m1!=None: atom_dict = {} if format=="true_pdb2": p1 = '(?P<p1>'+WORD+')' s = a[m1.start('Atom_element'):m1.end('Atom_element')] m2 = re.search(p1, s) if m2!=None: atom_dict["Atom_element"] = s[m2.start('p1'):m2.end('p1')] else: atom_dict["Atom_element"] = a[m1.start('Element_name'):m1.end('Element_name')] atom_dict["Atom_cm_x"] = float(a[m1.start('X_val'):m1.end('X_val')]) * units.Angst atom_dict["Atom_cm_y"] = float(a[m1.start('Y_val'):m1.end('Y_val')]) * units.Angst atom_dict["Atom_cm_z"] = float(a[m1.start('Z_val'):m1.end('Z_val')]) * units.Angst if format=="pdb" or format=="true_pdb": atom_dict["Atom_charge"] = float(a[m1.start('Atom_charge'):m1.end('Atom_charge')]) # atm.Atom_ff_int_type = int(float(a[m1.start('Atom_type'):m1.end('Atom_type')])) # atm.Atom_is_C60_CT = int(a[m1.start('Atom_C60'):m1.end('Atom_C60')]) if verbosity>0: print("CREATE_ATOM ") syst.CREATE_ATOM( Atom(univ,atom_dict) ) #--------- Create bonds ------------------ print(len(B)) for b in B: m2 = re.search(Bond_Record,b) lst = [] if m2!=None: lst = compINT.findall(m2.group(0)) # if (int(float(lst[0][0])) == 10): # exit(0) i = 1 while i < len(lst): if verbosity>0: print('LINK_ATOMS ',lst[0][0], ' and ', lst[i][0],'...') syst.LINK_ATOMS(int(float(lst[0][0])),int(float(lst[i][0]))) i = i + 1 #----------- Group atoms -------------------- j = 1 for fr in C: m3 = re.search(Fragment_Record,fr) lst = [] if m3!=None: print(m3.group()) lst = compINT.findall(m3.group()) gr_atoms = [] i = 0 while i<len(lst): gr_atoms.append(int(float(lst[i][0]))) i = i + 1 # if len(gr_atoms)>0: if verbosity>0: print("GROUP_ATOMS",gr_atoms," to form fragment with Group_id = ",j) syst.GROUP_ATOMS(gr_atoms,j) # syst.show_fragments() j = j + 1
#----------------------------------------------------------------------