Source code for sas.sascalc.pr.invertor

# pylint: disable=invalid-name
"""
Module to perform P(r) inversion.
The module contains the Invertor class.

FIXME: The way the Invertor interacts with its C component should be cleaned up
"""

import numpy
import sys
import math
import time
import copy
import os
import re
import logging
from numpy.linalg import lstsq
from scipy import optimize
from sas.sascalc.pr.core.pr_inversion import Cinvertor

[docs]def help():
    """
    Provide general online help text
    Future work: extend this function to allow topic selection
    """
    info_txt = "The inversion approach is based on Moore, J. Appl. Cryst. "
    info_txt += "(1980) 13, 168-175.\n\n"
    info_txt += "P(r) is set to be equal to an expansion of base functions "
    info_txt += "of the type "
    info_txt += "phi_n(r) = 2*r*sin(pi*n*r/D_max). The coefficient of each "
    info_txt += "base functions "
    info_txt += "in the expansion is found by performing a least square fit "
    info_txt += "with the "
    info_txt += "following fit function:\n\n"
    info_txt += "chi**2 = sum_i[ I_meas(q_i) - I_th(q_i) ]**2/error**2 +"
    info_txt += "Reg_term\n\n"
    info_txt += "where I_meas(q) is the measured scattering intensity and "
    info_txt += "I_th(q) is "
    info_txt += "the prediction from the Fourier transform of the P(r) "
    info_txt += "expansion. "
    info_txt += "The Reg_term term is a regularization term set to the second"
    info_txt += " derivative "
    info_txt += "d**2P(r)/dr**2 integrated over r. It is used to produce "
    info_txt += "a smooth P(r) output.\n\n"
    info_txt += "The following are user inputs:\n\n"
    info_txt += "   - Number of terms: the number of base functions in the P(r)"
    info_txt += " expansion.\n\n"
    info_txt += "   - Regularization constant: a multiplicative constant "
    info_txt += "to set the size of "
    info_txt += "the regularization term.\n\n"
    info_txt += "   - Maximum distance: the maximum distance between any "
    info_txt += "two points in the system.\n"

    return info_txt


[docs]class Invertor(Cinvertor):
    """
    Invertor class to perform P(r) inversion

    The problem is solved by posing the problem as  Ax = b,
    where x is the set of coefficients we are looking for.

    Npts is the number of points.

    In the following i refers to the ith base function coefficient.
    The matrix has its entries j in its first Npts rows set to ::

        A[j][i] = (Fourier transformed base function for point j)

    We them choose a number of r-points, n_r, to evaluate the second
    derivative of P(r) at. This is used as our regularization term.
    For a vector r of length n_r, the following n_r rows are set to ::

        A[j+Npts][i] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
        evaluated at r[j])

    The vector b has its first Npts entries set to ::

        b[j] = (I(q) observed for point j)

    The following n_r entries are set to zero.

    The result is found by using scipy.linalg.basic.lstsq to invert
    the matrix and find the coefficients x.

    Methods inherited from Cinvertor:

    * ``get_peaks(pars)``: returns the number of P(r) peaks
    * ``oscillations(pars)``: returns the oscillation parameters for the output P(r)
    * ``get_positive(pars)``: returns the fraction of P(r) that is above zero
    * ``get_pos_err(pars)``: returns the fraction of P(r) that is 1-sigma above zero
    """
    ## Chisqr of the last computation
    chi2 = 0
    ## Time elapsed for last computation
    elapsed = 0
    ## Alpha to get the reg term the same size as the signal
    suggested_alpha = 0
    ## Last number of base functions used
    nfunc = 10
    ## Last output values
    out = None
    ## Last errors on output values
    cov = None
    ## Background value
    background = 0
    ## Information dictionary for application use
    info = {}

    def __init__(self):
        Cinvertor.__init__(self)

    def __setstate__(self, state):
        """
        restore the state of invertor for pickle
        """
        (self.__dict__, self.alpha, self.d_max,
         self.q_min, self.q_max,
         self.x, self.y,
         self.err, self.has_bck,
         self.slit_height, self.slit_width) = state

    def __reduce_ex__(self, proto):
        """
        Overwrite the __reduce_ex__
        """

        state = (self.__dict__,
                 self.alpha, self.d_max,
                 self.q_min, self.q_max,
                 self.x, self.y,
                 self.err, self.has_bck,
                 self.slit_height, self.slit_width,
                )
        return (Invertor, tuple(), state, None, None)

    def __setattr__(self, name, value):
        """
        Set the value of an attribute.
        Access the parent class methods for
        x, y, err, d_max, q_min, q_max and alpha
        """
        if   name == 'x':
            if 0.0 in value:
                msg = "Invertor: one of your q-values is zero. "
                msg += "Delete that entry before proceeding"
                raise ValueError, msg
            return self.set_x(value)
        elif name == 'y':
            return self.set_y(value)
        elif name == 'err':
            value2 = abs(value)
            return self.set_err(value2)
        elif name == 'd_max':
            if value <= 0.0:
                msg = "Invertor: d_max must be greater than zero."
                msg += "Correct that entry before proceeding"
                raise ValueError, msg
            return self.set_dmax(value)
        elif name == 'q_min':
            if value == None:
                return self.set_qmin(-1.0)
            return self.set_qmin(value)
        elif name == 'q_max':
            if value == None:
                return self.set_qmax(-1.0)
            return self.set_qmax(value)
        elif name == 'alpha':
            return self.set_alpha(value)
        elif name == 'slit_height':
            return self.set_slit_height(value)
        elif name == 'slit_width':
            return self.set_slit_width(value)
        elif name == 'has_bck':
            if value == True:
                return self.set_has_bck(1)
            elif value == False:
                return self.set_has_bck(0)
            else:
                raise ValueError, "Invertor: has_bck can only be True or False"

        return Cinvertor.__setattr__(self, name, value)

    def __getattr__(self, name):
        """
        Return the value of an attribute
        """
        #import numpy
        if name == 'x':
            out = numpy.ones(self.get_nx())
            self.get_x(out)
            return out
        elif name == 'y':
            out = numpy.ones(self.get_ny())
            self.get_y(out)
            return out
        elif name == 'err':
            out = numpy.ones(self.get_nerr())
            self.get_err(out)
            return out
        elif name == 'd_max':
            return self.get_dmax()
        elif name == 'q_min':
            qmin = self.get_qmin()
            if qmin < 0:
                return None
            return qmin
        elif name == 'q_max':
            qmax = self.get_qmax()
            if qmax < 0:
                return None
            return qmax
        elif name == 'alpha':
            return self.get_alpha()
        elif name == 'slit_height':
            return self.get_slit_height()
        elif name == 'slit_width':
            return self.get_slit_width()
        elif name == 'has_bck':
            value = self.get_has_bck()
            if value == 1:
                return True
            else:
                return False
        elif name in self.__dict__:
            return self.__dict__[name]
        return None

[docs]    def clone(self):
        """
        Return a clone of this instance
        """
        #import copy

        invertor = Invertor()
        invertor.chi2 = self.chi2
        invertor.elapsed = self.elapsed
        invertor.nfunc = self.nfunc
        invertor.alpha = self.alpha
        invertor.d_max = self.d_max
        invertor.q_min = self.q_min
        invertor.q_max = self.q_max

        invertor.x = self.x
        invertor.y = self.y
        invertor.err = self.err
        invertor.has_bck = self.has_bck
        invertor.slit_height = self.slit_height
        invertor.slit_width = self.slit_width

        invertor.info = copy.deepcopy(self.info)

        return invertor

[docs]    def invert(self, nfunc=10, nr=20):
        """
        Perform inversion to P(r)

        The problem is solved by posing the problem as  Ax = b,
        where x is the set of coefficients we are looking for.

        Npts is the number of points.

        In the following i refers to the ith base function coefficient.
        The matrix has its entries j in its first Npts rows set to ::

            A[i][j] = (Fourier transformed base function for point j)

        We them choose a number of r-points, n_r, to evaluate the second
        derivative of P(r) at. This is used as our regularization term.
        For a vector r of length n_r, the following n_r rows are set to ::

            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2, evaluated at r[j])

        The vector b has its first Npts entries set to ::

            b[j] = (I(q) observed for point j)

        The following n_r entries are set to zero.

        The result is found by using scipy.linalg.basic.lstsq to invert
        the matrix and find the coefficients x.

        :param nfunc: number of base functions to use.
        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.
        :return: c_out, c_cov - the coefficients with covariance matrix
        """
        # Reset the background value before proceeding
        self.background = 0.0
        return self.lstsq(nfunc, nr=nr)

[docs]    def iq(self, out, q):
        """
        Function to call to evaluate the scattering intensity

        :param args: c-parameters, and q
        :return: I(q)

        """
        return Cinvertor.iq(self, out, q) + self.background

[docs]    def invert_optimize(self, nfunc=10, nr=20):
        """
        Slower version of the P(r) inversion that uses scipy.optimize.leastsq.

        This probably produce more reliable results, but is much slower.
        The minimization function is set to
        sum_i[ (I_obs(q_i) - I_theo(q_i))/err**2 ] + alpha * reg_term,
        where the reg_term is given by Svergun: it is the integral of
        the square of the first derivative
        of P(r), d(P(r))/dr, integrated over the full range of r.

        :param nfunc: number of base functions to use.
        :param nr: number of r points to evaluate the 2nd derivative at
            for the reg. term.

        :return: c_out, c_cov - the coefficients with covariance matrix

        """
        self.nfunc = nfunc
        # First, check that the current data is valid
        if self.is_valid() <= 0:
            msg = "Invertor.invert: Data array are of different length"
            raise RuntimeError, msg

        p = numpy.ones(nfunc)
        t_0 = time.time()
        out, cov_x, _, _, _ = optimize.leastsq(self.residuals, p, full_output=1)

        # Compute chi^2
        res = self.residuals(out)
        chisqr = 0
        for i in range(len(res)):
            chisqr += res[i]

        self.chi2 = chisqr

        # Store computation time
        self.elapsed = time.time() - t_0

        if cov_x is None:
            cov_x = numpy.ones([nfunc, nfunc])
            cov_x *= math.fabs(chisqr)
        return out, cov_x

[docs]    def pr_fit(self, nfunc=5):
        """
        This is a direct fit to a given P(r). It assumes that the y data
        is set to some P(r) distribution that we are trying to reproduce
        with a set of base functions.

        This method is provided as a test.
        """
        # First, check that the current data is valid
        if self.is_valid() <= 0:
            msg = "Invertor.invert: Data arrays are of different length"
            raise RuntimeError, msg

        p = numpy.ones(nfunc)
        t_0 = time.time()
        out, cov_x, _, _, _ = optimize.leastsq(self.pr_residuals, p, full_output=1)

        # Compute chi^2
        res = self.pr_residuals(out)
        chisqr = 0
        for i in range(len(res)):
            chisqr += res[i]

        self.chisqr = chisqr

        # Store computation time
        self.elapsed = time.time() - t_0

        return out, cov_x

[docs]    def pr_err(self, c, c_cov, r):
        """
        Returns the value of P(r) for a given r, and base function
        coefficients, with error.

        :param c: base function coefficients
        :param c_cov: covariance matrice of the base function coefficients
        :param r: r-value to evaluate P(r) at

        :return: P(r)

        """
        return self.get_pr_err(c, c_cov, r)

    def _accept_q(self, q):
        """
        Check q-value against user-defined range
        """
        if not self.q_min == None and q < self.q_min:
            return False
        if not self.q_max == None and q > self.q_max:
            return False
        return True

[docs]    def lstsq(self, nfunc=5, nr=20):
        """
        The problem is solved by posing the problem as  Ax = b,
        where x is the set of coefficients we are looking for.

        Npts is the number of points.

        In the following i refers to the ith base function coefficient.
        The matrix has its entries j in its first Npts rows set to ::

            A[i][j] = (Fourier transformed base function for point j)

        We them choose a number of r-points, n_r, to evaluate the second
        derivative of P(r) at. This is used as our regularization term.
        For a vector r of length n_r, the following n_r rows are set to ::

            A[i+Npts][j] = (2nd derivative of P(r), d**2(P(r))/d(r)**2,
            evaluated at r[j])

        The vector b has its first Npts entries set to ::

            b[j] = (I(q) observed for point j)

        The following n_r entries are set to zero.

        The result is found by using scipy.linalg.basic.lstsq to invert
        the matrix and find the coefficients x.

        :param nfunc: number of base functions to use.
        :param nr: number of r points to evaluate the 2nd derivative at for the reg. term.

        If the result does not allow us to compute the covariance matrix,
        a matrix filled with zeros will be returned.

        """
        # Note: To make sure an array is contiguous:
        # blah = numpy.ascontiguousarray(blah_original)
        # ... before passing it to C

        if self.is_valid() < 0:
            msg = "Invertor: invalid data; incompatible data lengths."
            raise RuntimeError, msg

        self.nfunc = nfunc
        # a -- An M x N matrix.
        # b -- An M x nrhs matrix or M vector.
        npts = len(self.x)
        nq = nr
        sqrt_alpha = math.sqrt(math.fabs(self.alpha))
        if sqrt_alpha < 0.0:
            nq = 0

        # If we need to fit the background, add a term
        if self.has_bck == True:
            nfunc_0 = nfunc
            nfunc += 1

        a = numpy.zeros([npts + nq, nfunc])
        b = numpy.zeros(npts + nq)
        err = numpy.zeros([nfunc, nfunc])

        # Construct the a matrix and b vector that represent the problem
        t_0 = time.time()
        try:
            self._get_matrix(nfunc, nq, a, b)
        except:
            raise RuntimeError, "Invertor: could not invert I(Q)\n  %s" % sys.exc_value

        # Perform the inversion (least square fit)
        c, chi2, _, _ = lstsq(a, b)
        # Sanity check
        try:
            float(chi2)
        except:
            chi2 = -1.0
        self.chi2 = chi2

        inv_cov = numpy.zeros([nfunc, nfunc])
        # Get the covariance matrix, defined as inv_cov = a_transposed * a
        self._get_invcov_matrix(nfunc, nr, a, inv_cov)

        # Compute the reg term size for the output
        sum_sig, sum_reg = self._get_reg_size(nfunc, nr, a)

        if math.fabs(self.alpha) > 0:
            new_alpha = sum_sig / (sum_reg / self.alpha)
        else:
            new_alpha = 0.0
        self.suggested_alpha = new_alpha

        try:
            cov = numpy.linalg.pinv(inv_cov)
            err = math.fabs(chi2 / float(npts - nfunc)) * cov
        except:
            # We were not able to estimate the errors
            # Return an empty error matrix
            logging.error(sys.exc_value)

        # Keep a copy of the last output
        if self.has_bck == False:
            self.background = 0
            self.out = c
            self.cov = err
        else:
            self.background = c[0]

            err_0 = numpy.zeros([nfunc, nfunc])
            c_0 = numpy.zeros(nfunc)

            for i in range(nfunc_0):
                c_0[i] = c[i + 1]
                for j in range(nfunc_0):
                    err_0[i][j] = err[i + 1][j + 1]

            self.out = c_0
            self.cov = err_0

        # Store computation time
        self.elapsed = time.time() - t_0

        return self.out, self.cov

[docs]    def estimate_numterms(self, isquit_func=None):
        """
        Returns a reasonable guess for the
        number of terms

        :param isquit_func:
          reference to thread function to call to check whether the computation needs to
          be stopped.

        :return: number of terms, alpha, message

        """
        from num_term import NTermEstimator
        estimator = NTermEstimator(self.clone())
        try:
            return estimator.num_terms(isquit_func)
        except:
            # If we fail, estimate alpha and return the default
            # number of terms
            best_alpha, _, _ = self.estimate_alpha(self.nfunc)
            logging.warning("Invertor.estimate_numterms: %s" % sys.exc_value)
            return self.nfunc, best_alpha, "Could not estimate number of terms"

[docs]    def estimate_alpha(self, nfunc):
        """
        Returns a reasonable guess for the
        regularization constant alpha

        :param nfunc: number of terms to use in the expansion.

        :return: alpha, message, elapsed

        where alpha is the estimate for alpha,
        message is a message for the user,
        elapsed is the computation time
        """
        #import time
        try:
            pr = self.clone()

            # T_0 for computation time
            starttime = time.time()
            elapsed = 0

            # If the current alpha is zero, try
            # another value
            if pr.alpha <= 0:
                pr.alpha = 0.0001

            # Perform inversion to find the largest alpha
            out, _ = pr.invert(nfunc)
            elapsed = time.time() - starttime
            initial_alpha = pr.alpha
            initial_peaks = pr.get_peaks(out)

            # Try the inversion with the estimated alpha
            pr.alpha = pr.suggested_alpha
            out, _ = pr.invert(nfunc)

            npeaks = pr.get_peaks(out)
            # if more than one peak to start with
            # just return the estimate
            if npeaks > 1:
                #message = "Your P(r) is not smooth,
                #please check your inversion parameters"
                message = None
                return pr.suggested_alpha, message, elapsed
            else:

                # Look at smaller values
                # We assume that for the suggested alpha, we have 1 peak
                # if not, send a message to change parameters
                alpha = pr.suggested_alpha
                best_alpha = pr.suggested_alpha
                found = False
                for i in range(10):
                    pr.alpha = (0.33) ** (i + 1) * alpha
                    out, _ = pr.invert(nfunc)

                    peaks = pr.get_peaks(out)
                    if peaks > 1:
                        found = True
                        break
                    best_alpha = pr.alpha

                # If we didn't find a turning point for alpha and
                # the initial alpha already had only one peak,
                # just return that
                if not found and initial_peaks == 1 and \
                    initial_alpha < best_alpha:
                    best_alpha = initial_alpha

                # Check whether the size makes sense
                message = ''

                if not found:
                    message = None
                elif best_alpha >= 0.5 * pr.suggested_alpha:
                    # best alpha is too big, return a
                    # reasonable value
                    message = "The estimated alpha for your system is too "
                    message += "large. "
                    message += "Try increasing your maximum distance."

                return best_alpha, message, elapsed

        except:
            message = "Invertor.estimate_alpha: %s" % sys.exc_value
            return 0, message, elapsed

[docs]    def to_file(self, path, npts=100):
        """
        Save the state to a file that will be readable
        by SliceView.

        :param path: path of the file to write
        :param npts: number of P(r) points to be written

        """
        file = open(path, 'w')
        file.write("#d_max=%g\n" % self.d_max)
        file.write("#nfunc=%g\n" % self.nfunc)
        file.write("#alpha=%g\n" % self.alpha)
        file.write("#chi2=%g\n" % self.chi2)
        file.write("#elapsed=%g\n" % self.elapsed)
        file.write("#qmin=%s\n" % str(self.q_min))
        file.write("#qmax=%s\n" % str(self.q_max))
        file.write("#slit_height=%g\n" % self.slit_height)
        file.write("#slit_width=%g\n" % self.slit_width)
        file.write("#background=%g\n" % self.background)
        if self.has_bck == True:
            file.write("#has_bck=1\n")
        else:
            file.write("#has_bck=0\n")
        file.write("#alpha_estimate=%g\n" % self.suggested_alpha)
        if not self.out == None:
            if len(self.out) == len(self.cov):
                for i in range(len(self.out)):
                    file.write("#C_%i=%s+-%s\n" % (i, str(self.out[i]),
                                                   str(self.cov[i][i])))
        file.write("<r>  <Pr>  <dPr>\n")
        r = numpy.arange(0.0, self.d_max, self.d_max / npts)

        for r_i in r:
            (value, err) = self.pr_err(self.out, self.cov, r_i)
            file.write("%g  %g  %g\n" % (r_i, value, err))

        file.close()

[docs]    def from_file(self, path):
        """
        Load the state of the Invertor from a file,
        to be able to generate P(r) from a set of
        parameters.

        :param path: path of the file to load

        """
        #import os
        #import re
        if os.path.isfile(path):
            try:
                fd = open(path, 'r')

                buff = fd.read()
                lines = buff.split('\n')
                for line in lines:
                    if line.startswith('#d_max='):
                        toks = line.split('=')
                        self.d_max = float(toks[1])
                    elif line.startswith('#nfunc='):
                        toks = line.split('=')
                        self.nfunc = int(toks[1])
                        self.out = numpy.zeros(self.nfunc)
                        self.cov = numpy.zeros([self.nfunc, self.nfunc])
                    elif line.startswith('#alpha='):
                        toks = line.split('=')
                        self.alpha = float(toks[1])
                    elif line.startswith('#chi2='):
                        toks = line.split('=')
                        self.chi2 = float(toks[1])
                    elif line.startswith('#elapsed='):
                        toks = line.split('=')
                        self.elapsed = float(toks[1])
                    elif line.startswith('#alpha_estimate='):
                        toks = line.split('=')
                        self.suggested_alpha = float(toks[1])
                    elif line.startswith('#qmin='):
                        toks = line.split('=')
                        try:
                            self.q_min = float(toks[1])
                        except:
                            self.q_min = None
                    elif line.startswith('#qmax='):
                        toks = line.split('=')
                        try:
                            self.q_max = float(toks[1])
                        except:
                            self.q_max = None
                    elif line.startswith('#slit_height='):
                        toks = line.split('=')
                        self.slit_height = float(toks[1])
                    elif line.startswith('#slit_width='):
                        toks = line.split('=')
                        self.slit_width = float(toks[1])
                    elif line.startswith('#background='):
                        toks = line.split('=')
                        self.background = float(toks[1])
                    elif line.startswith('#has_bck='):
                        toks = line.split('=')
                        if int(toks[1]) == 1:
                            self.has_bck = True
                        else:
                            self.has_bck = False

                    # Now read in the parameters
                    elif line.startswith('#C_'):
                        toks = line.split('=')
                        p = re.compile('#C_([0-9]+)')
                        m = p.search(toks[0])
                        toks2 = toks[1].split('+-')
                        i = int(m.group(1))
                        self.out[i] = float(toks2[0])

                        self.cov[i][i] = float(toks2[1])

            except:
                msg = "Invertor.from_file: corrupted file\n%s" % sys.exc_value
                raise RuntimeError, msg
        else:
            msg = "Invertor.from_file: '%s' is not a file" % str(path)
            raise RuntimeError, msg