import numpy as np 
from scipy import io as spio


# Saving as matlab file
a = np.ones((3, 3))
spio.savemat('file.mat', {'a': a}) # savemat expects a dictionary


# Loading matlab file
data = spio.loadmat('file.mat')
data['a']

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])


# Python / Matlab mismatches, eg matlab does not represent 1D arrays
a = np.ones(3)
print(a)
spio.savemat('file.mat', {'a': a})
data = spio.loadmat('file.mat')['a']
print(data)

[1. 1. 1.]
[[1. 1. 1.]]


from scipy import linalg


arr = np.array([[1, 2],
                [3, 4]])


linalg.det(arr)

-2.0


arr = np.array([[3, 2],
                [6, 4]])


linalg.det(arr)

6.661338147750939e-16


linalg.det(np.ones((3, 4)))

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-06573727bd3f> in <module>
----> 1 linalg.det(np.ones((3, 4)))

C:\ProgramData\Anaconda3\lib\site-packages\scipy\linalg\basic.py in det(a, overwrite_a, check_finite)
   1018     a1 = _asarray_validated(a, check_finite=check_finite)
   1019     if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
-> 1020         raise ValueError('expected square matrix')
   1021     overwrite_a = overwrite_a or _datacopied(a1, a)
   1022     fdet, = get_flinalg_funcs(('det',), (a1,))

ValueError: expected square matrix


linalg.det(np.ones((3, 3)))

0.0


arr = np.array([[1, 2],
                [3, 4]])


iarr = linalg.inv(arr)
print(iarr)

[[-2.   1. ]
 [ 1.5 -0.5]]


arr = np.array([[3, 2],
                [6, 4]])


linalg.inv(arr)

---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-17-ed8389f90f54> in <module>
----> 1 linalg.inv(arr)

C:\ProgramData\Anaconda3\lib\site-packages\scipy\linalg\basic.py in inv(a, overwrite_a, check_finite)
    961         inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1)
    962     if info > 0:
--> 963         raise LinAlgError("singular matrix")
    964     if info < 0:
    965         raise ValueError('illegal value in %d-th argument of internal '

LinAlgError: singular matrix


arr = np.arange(9).reshape((3, 3)) + np.diag([1, 0, 1])
print(arr)

[[1 1 2]
 [3 4 5]
 [6 7 9]]


uarr, spec, vharr = linalg.svd(arr)


print(uarr)

[[-0.1617463  -0.98659196  0.02178164]
 [-0.47456365  0.09711667  0.87484724]
 [-0.86523261  0.13116653 -0.48390895]]


print(spec)

[14.88982544  0.45294236  0.29654967]


print(vharr)

[[-0.45513179 -0.54511245 -0.70406496]
 [ 0.20258033  0.70658087 -0.67801525]
 [-0.86707339  0.45121601  0.21115836]]


# The original matrix can be re-composed by matrix multiplication of the outputs of svd with np.dot:
sarr = np.diag(spec)
svd_mat = uarr.dot(sarr).dot(vharr)
np.allclose(svd_mat, arr) # Elementwise equal or not?

True


from scipy.interpolate import interp1d


# Generate data
np.random.seed(0)
measured_time = np.linspace(0, 1, 10)
noise = 1e-1 * (np.random.random(10)*2 - 1)
measures = np.sin(2 * np.pi * measured_time) + noise


# Interpolate it to new time points
linear_interp = interp1d(measured_time, measures)
interpolation_time = np.linspace(0, 1, 50)
linear_results = linear_interp(interpolation_time)
cubic_interp = interp1d(measured_time, measures, kind='cubic')
cubic_results = cubic_interp(interpolation_time)


# Plot the data and the interpolation
from matplotlib import pyplot as plt
plt.figure(figsize=(6, 4))
plt.plot(measured_time, measures, 'o', ms=6, label='measures')
plt.plot(interpolation_time, linear_results, label='linear interp')
plt.plot(interpolation_time, cubic_results, label='cubic interp')
plt.legend()
plt.show()


#Demos a simple curve fitting

#First generate some data

# Seed the random number generator for reproducibility
np.random.seed(0)

x_data = np.linspace(-5, 5, num=50)
y_data = 2.9 * np.sin(1.5 * x_data) + np.random.normal(size=50)

# And plot it
plt.figure(figsize=(6, 4))
plt.scatter(x_data, y_data)
plt.show()


# Now fit a simple sine function to the data

from scipy import optimize

def test_func(x, a, b):
    return a * np.sin(b * x)

params, params_covariance = optimize.curve_fit(test_func, x_data, y_data,
                                               p0=[2, 2])

print(params)

[3.05931973 1.45754553]


# And plot the resulting curve on the data
plt.figure(figsize=(6, 4))
plt.scatter(x_data, y_data, label='Data')
plt.plot(x_data, test_func(x_data, params[0], params[1]),
         label='Fitted function')

plt.legend(loc='best')

plt.show()


# Demos various methods to find the minimum of a function.
def f(x):
    return x**2 + 10*np.sin(x)


x = np.arange(-10, 10, 0.1)
plt.plot(x, f(x))
plt.show()


# Now find the minimum with a few methods
from scipy import optimize

# The default (Nelder Mead)
print(optimize.minimize(f, x0=0))

      fun: -7.945823375615215
 hess_inv: array([[0.08589237]])
      jac: array([-1.1920929e-06])
  message: 'Optimization terminated successfully.'
     nfev: 12
      nit: 5
     njev: 6
   status: 0
  success: True
        x: array([-1.30644012])


# Methods: As the function is a smooth function, gradient-descent based methods are good options. 
# The lBFGS algorithm is a good choice in general:
res = optimize.minimize(f, x0=0, method="L-BFGS-B")
print(res)

      fun: array([-7.94582338])
 hess_inv: <1x1 LbfgsInvHessProduct with dtype=float64>
      jac: array([-1.68753901e-06])
  message: 'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
     nfev: 12
      nit: 5
     njev: 6
   status: 0
  success: True
        x: array([-1.30644017])


xMin = res.x

plt.plot(x, f(x))
plt.plot(xMin, f(xMin), 'o')
plt.show()


# Global minimum: A possible issue with this approach is that, 
# if the function has local minima, the algorithm may find these 
# local minima instead of the global minimum depending on the initial point x0:
res = optimize.minimize(f, x0=3, method="L-BFGS-B")
xMin = res.x

plt.plot(x, f(x))
plt.plot(xMin, f(xMin), 'o')
plt.show()


# If we don’t know the neighborhood of the global minimum to choose
# the initial point, we need to resort to costlier global optimization.
# To find the global minimum, we use scipy.optimize.basinhopping()
# (added in version 0.12.0 of Scipy). 
# t combines a local optimizer with sampling of starting points:

res = optimize.basinhopping(f, 0)

xMin = res.x

plt.plot(x, f(x))
plt.plot(xMin, f(xMin), 'o')
plt.show()


# We can constrain the variable to the interval (0, 10) using the “bounds” argument:
res = optimize.minimize(f, x0=3, bounds=((0, 10), ))
xMin = res.x

plt.plot(x, f(x))
plt.plot(xMin, f(xMin), 'o')
plt.show()


def f(x):
    return x**2 + 10*np.sin(x)


x = np.arange(-10, 10, 0.1)
plt.plot(x, f(x))
plt.show()


# Root finding
root = optimize.root(f, 1)  # our initial guess is 1
print("First root found %s" % root.x)
root2 = optimize.root(f, -2.5)
print("Second root found %s" % root2.x)

First root found [0.]
Second root found [-2.47948183]


# Find minima
# Global optimization
grid = (-10, 10, 0.1)
xmin_global = optimize.brute(f, (grid, ))
print("Global minima found %s" % xmin_global)

# Constrain optimization
xmin_local = optimize.fminbound(f, 0, 10)
print("Local minimum found %s" % xmin_local)

Global minima found [-1.30641113]
Local minimum found 3.8374671194983834


# Plot function, minima, and roots
fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111)

# Plot the function
ax.plot(x, f(x), 'b-', label="f(x)")

# Plot the minima
xmins = np.array([xmin_global[0], xmin_local])
ax.plot(xmins, f(xmins), 'go', label="Minima")

# Plot the roots
roots = np.array([root.x, root2.x])
ax.plot(roots, f(roots), 'kv', label="Roots")

# Decorate the figure
ax.legend(loc='best')
ax.set_xlabel('x')
ax.set_ylabel('f(x)')
ax.axhline(0, color='gray')
plt.show()


# Sample from a normal distribution using numpy's random number generator
samples = np.random.normal(size=10000)

# Compute a histogram of the sample
bins = np.linspace(-5, 5, 30)
histogram, bins = np.histogram(samples, bins=bins, normed=True)

bin_centers = 0.5*(bins[1:] + bins[:-1])

# Compute the PDF on the bin centers from scipy distribution object
from scipy import stats
pdf = stats.norm.pdf(bin_centers)

plt.figure(figsize=(6, 4))
plt.plot(bin_centers, histogram, label="Histogram of samples")
plt.plot(bin_centers, pdf, label="PDF")
plt.legend()
plt.show()

<ipython-input-42-77231af28dea>:6: VisibleDeprecationWarning: Passing `normed=True` on non-uniform bins has always been broken, and computes neither the probability density function nor the probability mass function. The result is only correct if the bins are uniform, when density=True will produce the same result anyway. The argument will be removed in a future version of numpy.
  histogram, bins = np.histogram(samples, bins=bins, normed=True)


mean, std = stats.norm.fit(samples)
print(mean,std)

-0.01824647525837795 0.9870225650686327


np.mean(samples)

-0.01824647525837795


np.median(samples)

-0.025087317353292057


# The median is also the percentile 50, because 50% of the observation are below it:
stats.scoreatpercentile(samples, 50)

-0.025087317353292057


# Similarly, we can calculate the percentile 90:
stats.scoreatpercentile(samples, 90)

1.2510702078093916


help(np.random.normal)

Help on built-in function normal:

normal(...) method of numpy.random.mtrand.RandomState instance
    normal(loc=0.0, scale=1.0, size=None)
    
    Draw random samples from a normal (Gaussian) distribution.
    
    The probability density function of the normal distribution, first
    derived by De Moivre and 200 years later by both Gauss and Laplace
    independently [2]_, is often called the bell curve because of
    its characteristic shape (see the example below).
    
    The normal distributions occurs often in nature.  For example, it
    describes the commonly occurring distribution of samples influenced
    by a large number of tiny, random disturbances, each with its own
    unique distribution [2]_.
    
    .. note::
        New code should use the ``normal`` method of a ``default_rng()``
        instance instead; please see the :ref:`random-quick-start`.
    
    Parameters
    ----------
    loc : float or array_like of floats
        Mean ("centre") of the distribution.
    scale : float or array_like of floats
        Standard deviation (spread or "width") of the distribution. Must be
        non-negative.
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  If size is ``None`` (default),
        a single value is returned if ``loc`` and ``scale`` are both scalars.
        Otherwise, ``np.broadcast(loc, scale).size`` samples are drawn.
    
    Returns
    -------
    out : ndarray or scalar
        Drawn samples from the parameterized normal distribution.
    
    See Also
    --------
    scipy.stats.norm : probability density function, distribution or
        cumulative density function, etc.
    Generator.normal: which should be used for new code.
    
    Notes
    -----
    The probability density for the Gaussian distribution is
    
    .. math:: p(x) = \frac{1}{\sqrt{ 2 \pi \sigma^2 }}
                     e^{ - \frac{ (x - \mu)^2 } {2 \sigma^2} },
    
    where :math:`\mu` is the mean and :math:`\sigma` the standard
    deviation. The square of the standard deviation, :math:`\sigma^2`,
    is called the variance.
    
    The function has its peak at the mean, and its "spread" increases with
    the standard deviation (the function reaches 0.607 times its maximum at
    :math:`x + \sigma` and :math:`x - \sigma` [2]_).  This implies that
    normal is more likely to return samples lying close to the mean, rather
    than those far away.
    
    References
    ----------
    .. [1] Wikipedia, "Normal distribution",
           https://en.wikipedia.org/wiki/Normal_distribution
    .. [2] P. R. Peebles Jr., "Central Limit Theorem" in "Probability,
           Random Variables and Random Signal Principles", 4th ed., 2001,
           pp. 51, 51, 125.
    
    Examples
    --------
    Draw samples from the distribution:
    
    >>> mu, sigma = 0, 0.1 # mean and standard deviation
    >>> s = np.random.normal(mu, sigma, 1000)
    
    Verify the mean and the variance:
    
    >>> abs(mu - np.mean(s))
    0.0  # may vary
    
    >>> abs(sigma - np.std(s, ddof=1))
    0.1  # may vary
    
    Display the histogram of the samples, along with
    the probability density function:
    
    >>> import matplotlib.pyplot as plt
    >>> count, bins, ignored = plt.hist(s, 30, density=True)
    >>> plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *
    ...                np.exp( - (bins - mu)**2 / (2 * sigma**2) ),
    ...          linewidth=2, color='r')
    >>> plt.show()
    
    Two-by-four array of samples from N(3, 6.25):
    
    >>> np.random.normal(3, 2.5, size=(2, 4))
    array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],   # random
           [ 0.39924804,  4.68456316,  4.99394529,  4.84057254]])  # random


# Generates 2 sets of observations
samples1 = np.random.normal(0, size=1000)
samples2 = np.random.normal(1, size=1000)

# Compute a histogram of the sample
bins = np.linspace(-4, 4, 30)
histogram1, bins = np.histogram(samples1, bins=bins, normed=True)
histogram2, bins = np.histogram(samples2, bins=bins, normed=True)

plt.figure(figsize=(6, 4))
plt.hist(samples1, bins=bins, normed=True, label="Samples 1")
plt.hist(samples2, bins=bins, normed=True, label="Samples 2")
plt.legend(loc='best')
plt.show()

<ipython-input-49-ef98de6e23f7>:7: VisibleDeprecationWarning: Passing `normed=True` on non-uniform bins has always been broken, and computes neither the probability density function nor the probability mass function. The result is only correct if the bins are uniform, when density=True will produce the same result anyway. The argument will be removed in a future version of numpy.
  histogram1, bins = np.histogram(samples1, bins=bins, normed=True)
<ipython-input-49-ef98de6e23f7>:8: VisibleDeprecationWarning: Passing `normed=True` on non-uniform bins has always been broken, and computes neither the probability density function nor the probability mass function. The result is only correct if the bins are uniform, when density=True will produce the same result anyway. The argument will be removed in a future version of numpy.
  histogram2, bins = np.histogram(samples2, bins=bins, normed=True)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-49-ef98de6e23f7> in <module>
      9 
     10 plt.figure(figsize=(6, 4))
---> 11 plt.hist(samples1, bins=bins, normed=True, label="Samples 1")
     12 plt.hist(samples2, bins=bins, normed=True, label="Samples 2")
     13 plt.legend(loc='best')

C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\pyplot.py in hist(x, bins, range, density, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, data, **kwargs)
   2683         orientation='vertical', rwidth=None, log=False, color=None,
   2684         label=None, stacked=False, *, data=None, **kwargs):
-> 2685     return gca().hist(
   2686         x, bins=bins, range=range, density=density, weights=weights,
   2687         cumulative=cumulative, bottom=bottom, histtype=histtype,

C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
   1445     def inner(ax, *args, data=None, **kwargs):
   1446         if data is None:
-> 1447             return func(ax, *map(sanitize_sequence, args), **kwargs)
   1448 
   1449         bound = new_sig.bind(ax, *args, **kwargs)

C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in hist(self, x, bins, range, density, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, **kwargs)
   6813             if patch:
   6814                 p = patch[0]
-> 6815                 p.update(kwargs)
   6816                 if lbl is not None:
   6817                     p.set_label(lbl)

C:\ProgramData\Anaconda3\lib\site-packages\matplotlib\artist.py in update(self, props)
    994                     func = getattr(self, f"set_{k}", None)
    995                     if not callable(func):
--> 996                         raise AttributeError(f"{type(self).__name__!r} object "
    997                                              f"has no property {k!r}")
    998                     ret.append(func(v))

AttributeError: 'Rectangle' object has no property 'normed'

High-level Scientific Computing: SciPy

SciPy¶

File input/output: scipy.io¶

Linear algebra operations: scipy.linalg¶

Determinant of matrices¶

Matrix inverse¶

Singular-value decomposition (SVD)¶

Interpolation: scipy.interpolate¶

Optimization and fit: scipy.optimize¶

Curve fitting¶

Finding the minimum of a smooth function¶

Finding the roots of a scalar function¶

Statistics and random numbers: scipy.stats¶

Statistical Analysis using Python¶

Distributions: histogram and probability density function¶

Mean, median and percentiles¶

Comparing 2 sets of samples from Gaussians¶

References¶