Source code for libra_py.data_read

#*********************************************************************************                     
#* Copyright (C) 2019 Alexey V. Akimov                                                   
#*                                                                                                     
#* This file is distributed under the terms of the GNU General Public License                          
#* as published by the Free Software Foundation, either version 2 of                                   
#* the License, or (at your option) any later version.                                                 
#* See the file LICENSE in the root directory of this distribution   
#* or <http://www.gnu.org/licenses/>.          
#***********************************************************************************
"""
.. module:: data_read
   :platform: Unix, Windows
   :synopsis: 
       This module implements various functions for getting data from files

.. moduleauthor:: Alexey V. Akimov

"""

import os
import sys
import math
import copy
import numpy as np

if sys.platform=="cygwin":
    from cyglibra_core import *
elif sys.platform=="linux" or sys.platform=="linux2":
    from liblibra_core import *

#import common_utils as comn
import util.libutil as comn

    
[docs]def get_matrix(nrows, ncols, filename_re, filename_im, act_sp):
    """

    This file reads the real and imaginary components of a matrix of given original size,  
    takes its sub-matrix (as defined by the act_sp function) and returns the resulting 
    complex matrix

    Args:
        nrows ( int ): the number of rows in the original matrix (read from the files)
        ncols ( int ): the number of columns in the original matrix (read from the files)
        filename_re ( string ): the name of the file containing the real part of the matrix 
        filename_im ( string ): the name of the file containing the imaginary part of the matrix 
        act_sp ( list of N ints ): the indices of the columns and rows to be taken to construct the 
            resulting matrices. The indexing starts from 0. These numbers shold not be larger than 
            `nrows` or `ncols`

    Returns:
        CMATRIX(N, N): where N is the number of actively included rows/columns


    Example:

        The following snippet will create a 4 x 4 matrix from the files "Ham_0_re"
        and "Ham_0_im" from the "res" directory. Each of the files is expected to be 
        a matrix of 10 x 10 in size. The rezulting 4 x 4 matrix will contain entries on
        the intersection of columns and rows with indices 0, 1, 3, and 4.

        >>> X = get_matrix(10, 10, "res/Ham_0_re", "res/Ham_0_im", [0,1,3,4])


    """

    X_re = MATRIX(nrows, ncols); X_re.Load_Matrix_From_File(filename_re)
    X_im = MATRIX(nrows, ncols); X_im.Load_Matrix_From_File(filename_im)

    nstates = len(act_sp)
    x_re = MATRIX(nstates, nstates);
    x_im = MATRIX(nstates, nstates);

    pop_submatrix(X_re, x_re, list(act_sp), list(act_sp))
    pop_submatrix(X_im, x_im, list(act_sp), list(act_sp))

    return CMATRIX(x_re, x_im)




[docs]def get_data(params):
    """Read a single set of data files 

    Args:
        params ( dictionary ): parameters controlling the function execution

            Required parameter keys:

            * **params["data_dim"]** ( int ): matrix dimension how many lines/columns in the file [Required!]
            * **params["active_space"]** ( list of ints ): the indices of the states we care 
                about. These indices will be used to determine the size of the created CMATRIX objects
                and only these states will be extracted from the original files [ default: range(data_dim) ]
            * **params["isnap"]** ( int ): index of the first file to read [Required!]
            * **params["fsnap"]** ( int ): index of the final file to read [Required!]
            * **params["data_re_prefix"]** ( string ): prefixes of the files with real part of the data [Required!]
            * **params["data_im_prefix"]** ( string ): prefixes of the files with imaginary part of the data [Required!]
            * **params["data_re_suffix"]** ( string ): suffixes of the files with real part of the Hvib(t) [default: "_re"]
            * **params["data_im_suffix"]** ( string ): suffixes of the files with imaginary part of the Hvib(t) [default: "_im"]

    Returns:
        list of CMATRIX objects: data: 
            a time series of data matrices, such that data[time] is a data at time step `time`

    Example:
        This example will read 10 pairs of files: "Hvib_0_re", "Hvib_0_im", "Hvib_1_re", "Hvib_1_im", ...
        "Hvib_9_re", "Hvib_9_im". Each file should contain a 4 x 4 matrix of numbers. It will generate a 
        list of 4 x 4 complex-valued matrices.

        >>> hvib = get_data({"data_dim":4, "isnap":0, "fsnap":10, "data_re_prefix":"Hvib", "data_im_prefix":"Hvib"})

        The following example will do the same as the example above, however the intially-read 4 x 4 matrices will
        be partially discarded. Out of 16 values only 4 (the upper left block of 4 numbers)  will be stored in 
        the resulting list of 2 x 2 complex-valued matrices. 

        >>> hvib = get_data({"data_dim":4, "isnap":0, "fsnap":10, "data_re_prefix":"Hvib", "data_im_prefix":"Hvib", "active_space":[0,1]})


    """

    critical_params = ["data_dim", "isnap", "fsnap", "data_re_prefix", "data_im_prefix"]
    default_params = { "data_re_suffix":"_re", "data_im_suffix":"_im", "active_space":range(params["data_dim"])}
    comn.check_input(params, default_params, critical_params)

    ndim = params["data_dim"]  # the number of cols/row in the input files

    data = []
    for i in range(params["isnap"],params["fsnap"]):

        filename_re = params["data_re_prefix"]+str(i)+params["data_re_suffix"]
        filename_im = params["data_im_prefix"]+str(i)+params["data_im_suffix"]
        data_i = get_matrix(ndim, ndim, filename_re, filename_im, params["active_space"] ) 
        data.append(data_i)

    return data


[docs]def get_data_sets(params):
    """Reads several sets of data files 

    Args:
        params ( dictionary ): parameters controlling the function execution [Required!]

            Required parameter keys:

            * **params["data_set_paths"]** ( list of strings ):
                define the paths of the directories where the data files for
                different data sets (e.g. independent MD trajectories) are located. 
            .. note::
                In addition, requires parameters described in
                :func:`get_data`

    Returns:
        list of lists of CMATRIX: data: 
            the time series of Hvib matrices for several data sets, such that
            data[idata][time] is a CMATRIX for the data set indexed by `idata`
            at time `time`


    Example:
        The full name of the vibronic Hamiltonian files read by this module should be:
    
        params["data_set_paths"][idata]+params["data_re_prefix"]+integer(time step)+params["data_re_suffix"] - for real part

        params["data_set_paths"][idata]+params["data_im_prefix"]+integer(time step)+params["data_im_suffix"] - for imaginary part

        Say, the directory "/home/alexeyak/test/step3/res0" contains files:
        Hvib_0_re, Hvib_1_re, .... ,    Hvib_999_re
        Hvib_0_im, Hvib_1_im, .... ,    Hvib_999_im

        Then set:

        >>> params["data_set_paths"] = ["/home/alexeyak/test/step3/res0/"]
        >>> params["data_re_prefix"] = "Hvib_"
        >>> params["data_re_suffix"] = "_re"
        >>> params["data_im_prefix"] = "Hvib_"
        >>> params["data_im_suffix"] = "_im"

    """

    critical_params = [ "data_set_paths" ] 
    default_params = { }
    comn.check_input(params, default_params, critical_params)

    data = []

    for idata in params["data_set_paths"]:   # over all MD trajectories (data sets)
        prms = dict(params)    
        prms.update({"data_re_prefix": idata+params["data_re_prefix"] })
        prms.update({"data_im_prefix": idata+params["data_im_prefix"] })                

        data_i = get_data(prms)  
        data.append(data_i)

    return data






[docs]def get_data_from_file(filename, xindx, yindx, xminval=None, xmaxval=None, yminval=None, ymaxval=None):
    """Read in the numeric data stored in a file as columns into Python lists

    Args:
        filename ( string ): The name of the data file
        xindx ( int ): the index of the column read as X
        yindx ( int ): the index of the column read as Y
        xminval ( double ): the minimal X value allowed in the read data set, 
            the points with X values below it will not be included [ default: None ]
        xmaxval ( double ): the maximal X value allowed in the read data set, 
            the points with X values above it will not be included [ default: None ]
        yminval ( double ): the minimal Y value allowed in the read data set, 
            the points with Y values below it will not be included [ default: None ]
        ymaxval ( double ): the maximal Y value allowed in the read data set, 
            the points with Y values above it will not be included [ default: None ]

    Returns:
        (list, list): (X, Y), where: 

            * X ( list of doubles ): x values read from the file, cropped according the conditions
            * Y ( list of doubles ): y values read from the file, cropped according the conditions

    """

    f = open(filename,"r")
    A = f.readlines()
    f.close()

    X, Y = [], []

    for a in A:
        tmp = a.split()

        x = float(tmp[xindx])
        y = float(tmp[yindx])

        is_add = 1
        if xminval != None:
            if  x < xminval:
                is_add = 0
        if xmaxval != None:
            if x > xmaxval:
                is_add = 0
        if yminval != None:
            if  y < yminval:
                is_add = 0
        if ymaxval != None:
            if y > ymaxval:
                is_add = 0

        if is_add:
            X.append(x)  
            Y.append(y)

    return X, Y




[docs]def get_data_from_file2(filename, cols):
    """Read in the numeric data stored in a file as columns into Python lists

    Args:
        filename ( string ): The name of the data file
        cols ( list of ints ): the indices of the columns to read

    Returns:
        (list of lists): data

    """

    f = open(filename,"r")
    A = f.readlines()
    f.close()

    sz = len(cols)
    res = []
    for i in range(0,sz):
        res.append([])

    for a in A:
        tmp = a.split()

        for i in range(0,sz):

            x = float(tmp[ cols[i] ])
            res[i].append(x)

    return res

   
[docs]def read_2D_grid(filename):
    """
    This function reads the 2D map pyplot formatted data from a file.
    
    Args:
        filename (sting) : name of the file to read
        
    Returns:
        double list, double list, list of lists of doubles:
            X grid, Y grid, and the Z values of the grid points
    
    """
    x,y,z = [], [], []
    
    f = open(filename, "r")
    A = f.readlines()
    f.close()
    
    #========== Determine the numbers =====
    nlines = len(A)        
    ny = 0
    line_size = len(A[ny].split())    
    while line_size > 0:        
        ny += 1
        line_size = len(A[ny].split())
                        
    nx = int((nlines + 1)/(ny + 1))
        
    #========== Get the grids =====
    for ix in range(nx):
        x.append( float(A[ix*(ny+1)].split()[0]) )
        
    for iy in range(ny):
        y.append( float(A[iy].split()[1]) )
    
    for ix in range(nx):
        z_x = []
        for iy in range(ny):
            z_xy = float(A[ix*(ny+1)+iy].split()[2])
            z_x.append(z_xy)
        z.append(z_x)
        
    return x, y, z