Source code for mango.application.io

__doc__ = \
"""
================================================================
Application Input/Output utilities (:mod:`mango.application.io`)
================================================================

.. currentmodule:: mango.application.io

Application specific input/output utilities.

Classes
=======

.. autosummary::
   :toctree: generated/
   
   HistPeaksData - 2D histogram peaks.
   HistData - 2D histogram data.

Functions
=========

.. autosummary::
   :toctree: generated/
   
   readCsvPeaksPerStdd - Reads 2D histogram peaks data (generated by Histogram_2D filter) from CSV file.
   readCsvHistData - Reads 2D histogram data (generated by Histogram_2D filter) CSV file.
"""


from mango import mpi
haveMpi4py = mpi.haveMpi4py 

import numpy as np
import scipy as sp
import copy
import sys
import logging
import os
import os.path
import re
import math
logger, rootLogger = mpi.getLoggers(__name__)


[docs]class HistPeaksData:
    """
    Peak data for 2D histogram of neighbourhood-mean vs neighbourhood
    standard-deviation data.
    """
[docs]    def __init__(self):
        self.peaksPerStdd = None
        self.peakRgnLbl   = None
    
[docs]    def getPeakRegionLabelImage(self, numMean=None):
        numStdd = self.peakRgnLbl.shape[0]
        numPeaks = self.peakRgnLbl.shape[1]/3
        maxMeanIdx = 0
        if (numMean == None):
            for i in range(numPeaks):
                maxMeanIdx = max([maxMeanIdx, np.max(self.peakRgnLbl[:,i*3+2])])
            numMean = maxMeanIdx+1
        lblImg = sp.zeros((numStdd, numMean))
        for i in range(numStdd):
            for j in range(numPeaks):
                idx = j*3
                lblImg[i, self.peakRgnLbl[i, idx+1]:self.peakRgnLbl[i, idx+2]+1] = self.peakRgnLbl[i, idx]
        
        return  lblImg


[docs]def readCsvPeaksPerStdd(peaksPerStddCsvFileName, rgnLblsPerStddCsvFileName):
    """
    Reads histogram peak data from files generated by the Histogram2D filter.
    :rtype: :obj:`HistPeaksData`
    """
    hpd = HistPeaksData()

    if (peaksPerStddCsvFileName != None):
        lines = file(peaksPerStddCsvFileName, 'r').readlines()
        lineIdx = 0
        peaksRegEx = re.compile('\\s*((mean-[0-9]*)(,\\s*stdd-[0-9]*)(,\\s*density-[0-9]*))+(.*)')
        datasetsRegEx = re.compile('.*data-file-[0-9]*="(.*)"\\s*,\\s*data-file-[0-9]*="(.*)".*\\s*,\\s*runId="(.*)"(.*)')
        foundPeaksLine = False
        peaksHeaderLine = None
        while ((not foundPeaksLine) and (lineIdx < len(lines))):
            line = lines[lineIdx].strip()
            lineIdx += 1
            foundPeaksLine = ((peaksRegEx.match(line)) != None)
            if (foundPeaksLine):
                peaksHeaderLine = line
            datasetsMtch = datasetsRegEx.match(line)
            if (datasetsMtch != None):
                fName0 = datasetsMtch.group(1)
                fName1 = datasetsMtch.group(2)
                runId  = datasetsMtch.group(3)
                rootLogger.info("CSV file     = %s" % peaksPerStddCsvFileName)
                rootLogger.info("CSV dataset0 = %s" % fName0)
                rootLogger.info("CSV dataset1 = %s" % fName1)
                rootLogger.info("CSV runId    = %s\n" % runId)

        if (foundPeaksLine):
            numPeaks = (len(peaksHeaderLine.split(","))-1)/3
            peakData = []
            line = lines[lineIdx].strip()
            while ((len(line) > 0) and (lineIdx < len(lines))):
                row = map(float,map(str.strip, line.split(",")))
                peakData.append(row)
                lineIdx += 1
                if (lineIdx < len(lines)):
                    line = lines[lineIdx].strip()
    
            hpd.peaksPerStdd = sp.array(peakData, dtype="float64")
        else:
            raise RuntimeError("Could not find stdd-peaks header line in file '%s'" % peaksPerStddCsvFileName)

    if (rgnLblsPerStddCsvFileName != None):
        lines = file(rgnLblsPerStddCsvFileName, 'r').readlines()
        lineIdx = 0
        rgnsRegEx = re.compile('\\s*((rgn-lbl)(,\\s*min-idx-[0-9]*)(,\\s*max-idx-[0-9]*))+(.*)')
        datasetsRegEx = re.compile('.*data-file-[0-9]*="(.*)"\\s*,\\s*data-file-[0-9]*="(.*)".*\\s*,\\s*runId="(.*)"(.*)')
        foundRgnLblLine = False
        rgnLblHeaderLine = None
        while ((not foundRgnLblLine) and (lineIdx < len(lines))):
            line = lines[lineIdx].strip()
            lineIdx += 1
            foundRgnLblLine = ((rgnsRegEx.match(line)) != None)
            if (foundRgnLblLine):
                rgnLblHeaderLine = line
            datasetsMtch = datasetsRegEx.match(line)
            if (datasetsMtch != None):
                fName0 = datasetsMtch.group(1)
                fName1 = datasetsMtch.group(2)
                runId  = datasetsMtch.group(3)
                rootLogger.info("CSV file = %s" % rgnLblsPerStddCsvFileName)
                rootLogger.info("CSV dataset0 = %s" % fName0)
                rootLogger.info("CSV dataset1 = %s" % fName1)
                rootLogger.info("CSV runId    = %s\n" % runId)
        
        if (foundRgnLblLine):
            numPeaks = (len(rgnLblHeaderLine.split(",")))/3
            rgnLblData = []
            line = lines[lineIdx].strip()
            while ((len(line) > 0) and (lineIdx < len(lines))):
                row = map(int,map(str.strip, line.split(",")))
                rgnLblData.append(row)
                lineIdx += 1
                if (lineIdx < len(lines)):
                    line = lines[lineIdx].strip()
    
            hpd.peakRgnLbl = sp.array(rgnLblData, dtype="int64")
        else:
            raise RuntimeError("Could not find rgn-lbl-header line in file '%s'" % rgnLblsPerStddCsvFileName)

    if ((hpd.peaksPerStdd == None) and (hpd.peakRgnLbl == None)):
        hpd = None

    return hpd

[docs]class HistData:
    """
    2D histogram object.
    """
[docs]    def __init__(self):
        self.fName0      = "x"
        self.fName1      = "y"
        self.runId       = ""
        self.hist2dData  = None
        self.hist1dData0 = None
        self.hist1dData1 = None
        self.x           = None
        self.y           = None
        self.edges       = None
        self.peaksData   = None

def getPerStddFileNames(csvFileName):
    peaksPerStddFileName = None
    rgnLblPerStddFileName = None
    
    csvDir,csvLeafFileName = os.path.split(csvFileName) 
    fNameRegEx = re.compile('hist2d_([0-9]*_[0-9]*_.*)')
    mtch = fNameRegEx.match(csvLeafFileName)
    if (mtch != None):
        peaksPerStddFileName = os.path.join(csvDir, "hist2d_stdd_peaks_" + mtch.group(1))
        rgnLblPerStddFileName = os.path.join(csvDir, "hist2d_stdd_regn_lbl_" + mtch.group(1))
        if (not os.path.exists(peaksPerStddFileName)):
            peaksPerStddFileName = None
        if (not os.path.exists(rgnLblPerStddFileName)):
            rgnLblPerStddFileName = None

    return (peaksPerStddFileName, rgnLblPerStddFileName)

[docs]def readCsvHistData(csvFileName):
    """
    Reads CSV 2D histogram data generated from the Histogram2d filter.
    
    :type csvFileName: :obj:`str`
    :param csvFileName: Name of file containing CSV histogram data.
    :rtype: :obj:`HistData`
    :return: A :obj:`HistData` object containing histogram data.
    """ 
    h2dd = HistData()
    peaksPerStddFileName, rgnLblPerStddFileName = getPerStddFileNames(csvFileName)
    hpd = readCsvPeaksPerStdd(peaksPerStddFileName, rgnLblPerStddFileName)
    h2dd.peaksData = hpd

    lines = file(csvFileName, 'r').readlines()
    lineIdx = 0
    edgesRegEx = re.compile('\\s*(bin-pts-[0-9]*)(,\\s*bin-pts-[0-9]*)*(.*)')
    datasetsRegEx = re.compile('.*data-file-[0-9]*="(.*)"\\s*,\\s*data-file-[0-9]*="(.*)".*\\s*,\\s*runId="(.*)"(.*)')
    foundEdgesLine = False
    while ((not foundEdgesLine) and (lineIdx < len(lines))):
        line = lines[lineIdx].strip()
        lineIdx += 1
        foundEdgesLine = ((edgesRegEx.match(line)) != None)
        datasetsMtch = datasetsRegEx.match(line)
        if (datasetsMtch != None):
            h2dd.fName0 = datasetsMtch.group(1)
            h2dd.fName1 = datasetsMtch.group(2)
            h2dd.runId  = datasetsMtch.group(3)
            rootLogger.info("CSV file     = %s" % csvFileName)
            rootLogger.info("CSV dataset0 = %s" % h2dd.fName0)
            rootLogger.info("CSV dataset1 = %s" % h2dd.fName1)
            rootLogger.info("CSV runId    = %s\n" % h2dd.runId)
    
    if (foundEdgesLine):
        edges = [[],[]]
        line = lines[lineIdx].strip()
        pairRegEx = re.compile("\\s*([^,]*)\\s*,\\s*([^,]*)((,.*)*)")
        while ((len(line) > 0) and (lineIdx < len(lines))):
            mtch = pairRegEx.match(line)
            if (mtch != None):
                g1 = mtch.group(1).strip()
                g2 = mtch.group(2).strip()
                if (len(g1) > 0):
                    edges[0].append(float(g1))
                if (len(g2) > 0):
                    edges[1].append(float(g2))
            lineIdx += 1
            if (lineIdx < len(lines)):
                line = lines[lineIdx].strip()
        h2dd.edges = [sp.array(edges[0], dtype="float64"), sp.array(edges[1], dtype="float64")]
        h2dd.x = (h2dd.edges[0][1:] + h2dd.edges[0][0:-1])/2.0
        h2dd.y = (h2dd.edges[1][1:] + h2dd.edges[1][0:-1])/2.0

        foundCountsLine = False
        countsRegEx = re.compile('\\s*bin-[0-9]*-idx\\s*,\\s*bin-[0-9]*-idx\\s*,\\s*count')
        while ((not foundCountsLine) and (lineIdx < len(lines))):
            line = lines[lineIdx].strip()
            lineIdx += 1
            foundCountsLine = ((countsRegEx.match(line)) != None)

        if (foundCountsLine):
            h2dd.hist2dData = sp.zeros((h2dd.x.size, h2dd.y.size), dtype="float64")
            if (lineIdx < len(lines)):
                line = lines[lineIdx].strip()
            tripleRegEx = re.compile("\\s*([^,]*)\\s*,\\s*([^,]*),\\s*([^,]*)")
            while (lineIdx < len(lines)):
                mtch = tripleRegEx.match(line)
                if (mtch != None):
                    triple = [int(mtch.group(1).strip()), int(mtch.group(2).strip()), int(mtch.group(3).strip())]
                    h2dd.hist2dData[triple[0], triple[1]] = triple[2]
                lineIdx += 1
                if (lineIdx < len(lines)):
                    line = lines[lineIdx].strip()

            h2dd.hist1dData0 = sp.sum(h2dd.hist2dData, axis=0)
            h2dd.hist1dData1 = sp.sum(h2dd.hist2dData, axis=1)

        else:
            raise RuntimeError("Could not find bin-counts header line in file '%s'" % csvFileName)
    else:
        raise RuntimeError("Could not find bin-pts header line in file '%s'" % csvFileName)

    # transpose everything for plotting
    tmp = h2dd
    h2dd = copy.copy(h2dd)
    h2dd.x              = tmp.y
    h2dd.y              = tmp.x
    h2dd.edges          = [tmp.edges[1], tmp.edges[0]]
    h2dd.hist2dData     = tmp.hist2dData.transpose()
    h2dd.hist1dData0    = tmp.hist1dData1
    h2dd.hist1dData1    = tmp.hist1dData0

    return h2dd

__all__ = [s for s in dir() if not s.startswith('_')]
Navigation

Source code for mango.application.io

Quick search

Navigation