import numpy as np
X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])

from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)


$ ipython
In [9]: model.fit??
Signature: model.fit(X, y=None, **params)
    def fit(self, X, y=None, **params):
        """Learn a NMF model for the data X.

        X: {array-like, sparse matrix}, shape (n_samples, n_features)
            Data matrix to be decomposed

        components_ : array-like, shape (n_components, n_features)
            Factorization matrix, sometimes called 'dictionary'.

        n_iter_ : int
            Actual number of iterations for the transform.

        self.fit_transform(X, **params)
        return self

File:      ~/.pyenv/versions/anaconda3-4.0.0/lib/python3.5/site-packages/sklearn/decomposition/nmf.py
Type:      method


In [12]: model.fit_transform??
Signature: model.fit_transform(X, y=None, W=None, H=None)
    def fit_transform(self, X, y=None, W=None, H=None):
        """Learn a NMF model for the data X and returns the transformed data.

        This is more efficient than calling fit followed by transform.

        X: {array-like, sparse matrix}, shape (n_samples, n_features)
            Data matrix to be decomposed

        W : array-like, shape (n_samples, n_components)
            If init='custom', it is used as initial guess for the solution.

        H : array-like, shape (n_components, n_features)
            If init='custom', it is used as initial guess for the solution.

        components_ : array-like, shape (n_components, n_features)
            Factorization matrix, sometimes called 'dictionary'.

        n_iter_ : int
            Actual number of iterations for the transform.

        W: array, shape (n_samples, n_components)
            Transformed data.
        X = check_array(X, accept_sparse=('csr', 'csc'))

        W, H, n_iter_ = non_negative_factorization(
            X=X, W=W, H=H, n_components=self.n_components,
            init=self.init, update_H=True, solver=self.solver,
            tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,
            l1_ratio=self.l1_ratio, regularization='both',
            random_state=self.random_state, verbose=self.verbose,
            nls_max_iter=self.nls_max_iter, sparseness=self.sparseness,
            beta=self.beta, eta=self.eta)

        if self.solver == 'pg':
            self.comp_sparseness_ = _sparseness(H.ravel())
            self.data_sparseness_ = _sparseness(W.ravel())

        self.reconstruction_err_ = _safe_compute_error(X, W, H)

        self.n_components_ = H.shape[0]
        self.components_ = H
        self.n_iter_ = n_iter_

        return W

File:      ~/.pyenv/versions/anaconda3-4.0.0/lib/python3.5/site-packages/sklearn/decomposition/nmf.py
Type:      method


In [13]: model.non_negative_factorization??
Object `model.non_negative_factorization` not found.


File:      ~/.pyenv/versions/anaconda3-4.0.0/lib/python3.5/site-packages/sklearn/decomposition/nmf.py


In [14]: import sklearn.decomposition
In [15]: sklearn.decomposition.nmf.non_negative_factorization??
Signature: sklearn.decomposition.nmf.non_negative_factorization(X, W=None, H=None, n_components=None, init='random', update_H=True, sol
def non_negative_factorization(X, W=None, H=None, n_components=None,
                               init='random', update_H=True, solver='cd',
                               tol=1e-4, max_iter=200, alpha=0., l1_ratio=0.,
                               regularization=None, random_state=None,
                               verbose=0, shuffle=False, nls_max_iter=2000,
                               sparseness=None, beta=1, eta=0.1):
    """Compute Non-negative Matrix Factorization (NMF)

    Find two non-negative matrices (W, H) whose product approximates the non-
    negative matrix X. This factorization can be used for example for
    dimensionality reduction, source separation or topic extraction.

    The objective function is::

        0.5 * ||X - WH||_Fro^2
        + alpha * l1_ratio * ||vec(W)||_1
        + alpha * l1_ratio * ||vec(H)||_1
        + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2
        + 0.5 * alpha * (1 - l1_ratio) * ||H||_Fro^2


        ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm)
        ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm)

    The objective function is minimized with an alternating minimization of W
    and H. If H is given and update_H=False, it solves for W only.

    X : array-like, shape (n_samples, n_features)
        Constant matrix.

    W : array-like, shape (n_samples, n_components)
        If init='custom', it is used as initial guess for the solution.

    H : array-like, shape (n_components, n_features)
        If init='custom', it is used as initial guess for the solution.
        If update_H=False, it is used as a constant, to solve for W only.

    n_components : integer
        Number of components, if n_components is not set all features
        are kept.

    init :  None | 'random' | 'nndsvd' | 'nndsvda' | 'nndsvdar' | 'custom'
        Method used to initialize the procedure.
        Default: 'nndsvd' if n_components < n_features, otherwise random.
        Valid options:

        - 'random': non-negative random matrices, scaled with:
            sqrt(X.mean() / n_components)

        - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)
            initialization (better for sparseness)

        - 'nndsvda': NNDSVD with zeros filled with the average of X
            (better when sparsity is not desired)

        - 'nndsvdar': NNDSVD with zeros filled with small random values
            (generally faster, less accurate alternative to NNDSVDa
            for when sparsity is not desired)

        - 'custom': use custom matrices W and H

    update_H : boolean, default: True
        Set to True, both W and H will be estimated from initial guesses.
        Set to False, only W will be estimated.

    solver : 'pg' | 'cd'
        Numerical solver to use:
        'pg' is a (deprecated) Projected Gradient solver.
        'cd' is a Coordinate Descent solver.

    tol : float, default: 1e-4
        Tolerance of the stopping condition.

    max_iter : integer, default: 200
        Maximum number of iterations before timing out.

    alpha : double, default: 0.
        Constant that multiplies the regularization terms.

    l1_ratio : double, default: 0.
        The regularization mixing parameter, with 0 <= l1_ratio <= 1.
        For l1_ratio = 0 the penalty is an elementwise L2 penalty
        (aka Frobenius Norm).
        For l1_ratio = 1 it is an elementwise L1 penalty.
        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

    regularization : 'both' | 'components' | 'transformation' | None
        Select whether the regularization affects the components (H), the
        transformation (W), both or none of them.

    random_state : integer seed, RandomState instance, or None (default)
        Random number generator seed control.

    verbose : integer, default: 0
        The verbosity level.

    shuffle : boolean, default: False
        If true, randomize the order of coordinates in the CD solver.

    nls_max_iter : integer, default: 2000
        Number of iterations in NLS subproblem.
        Used only in the deprecated 'pg' solver.

    sparseness : 'data' | 'components' | None, default: None
        Where to enforce sparsity in the model.
        Used only in the deprecated 'pg' solver.

    beta : double, default: 1
        Degree of sparseness, if sparseness is not None. Larger values mean
        more sparseness. Used only in the deprecated 'pg' solver.

    eta : double, default: 0.1
        Degree of correctness to maintain, if sparsity is not None. Smaller
        values mean larger error. Used only in the deprecated 'pg' solver.

    W : array-like, shape (n_samples, n_components)
        Solution to the non-negative least squares problem.

    H : array-like, shape (n_components, n_features)
        Solution to the non-negative least squares problem.

    n_iter : int
        Actual number of iterations.

    C.-J. Lin. Projected gradient methods for non-negative matrix
    factorization. Neural Computation, 19(2007), 2756-2779.

    Cichocki, Andrzej, and P. H. A. N. Anh-Huy. "Fast local algorithms for
    large scale nonnegative matrix and tensor factorizations."
    IEICE transactions on fundamentals of electronics, communications and
    computer sciences 92.3: 708-721, 2009.

    X = check_array(X, accept_sparse=('csr', 'csc'))
    check_non_negative(X, "NMF (input X)")
    _check_string_param(sparseness, solver)

    n_samples, n_features = X.shape
    if n_components is None:
        n_components = n_features

    if not isinstance(n_components, INTEGER_TYPES) or n_components <= 0:
        raise ValueError("Number of components must be a positive integer;"
                         " got (n_components=%r)" % n_components)
    if not isinstance(max_iter, INTEGER_TYPES) or max_iter < 0:
        raise ValueError("Maximum number of iterations must be a positive integer;"
                         " got (max_iter=%r)" % max_iter)
    if not isinstance(tol, numbers.Number) or tol < 0:
        raise ValueError("Tolerance for stopping criteria must be "
                         "positive; got (tol=%r)" % tol)

    # check W and H, or initialize them
    if init == 'custom' and update_H:
        _check_init(H, (n_components, n_features), "NMF (input H)")
        _check_init(W, (n_samples, n_components), "NMF (input W)")
    elif not update_H:
        _check_init(H, (n_components, n_features), "NMF (input H)")
        W = np.zeros((n_samples, n_components))
        W, H = _initialize_nmf(X, n_components, init=init,

    if solver == 'pg':
        warnings.warn("'pg' solver will be removed in release 0.19."
                      " Use 'cd' solver instead.", DeprecationWarning)
        if update_H:  # fit_transform
            W, H, n_iter = _fit_projected_gradient(X, W, H, tol,
                                                   alpha, l1_ratio,
                                                   beta, eta)
        else:  # transform
            W, H, n_iter = _update_projected_gradient_w(X, W, H,
                                                        tol, nls_max_iter,
                                                        alpha, l1_ratio,
                                                        sparseness, beta,
    elif solver == 'cd':
        W, H, n_iter = _fit_coordinate_descent(X, W, H, tol,
                                               alpha, l1_ratio,
        raise ValueError("Invalid solver parameter '%s'." % solver)

    if n_iter == max_iter:
        warnings.warn("Maximum number of iteration %d reached. Increase it to"
                      " improve convergence." % max_iter, ConvergenceWarning)

    return W, H, n_iter
File:      ~/.pyenv/versions/anaconda3-4.0.0/lib/python3.5/site-packages/sklearn/decomposition/nmf.py
Type:      function


In [19]: sklearn.decomposition.nmf._update_projected_gradient_w??
Signature: sklearn.decomposition.nmf._update_projected_gradient_w(X, W, H, tolW, nls_max_iter, alpha, l1_ratio, sparse
def _update_projected_gradient_w(X, W, H, tolW, nls_max_iter, alpha, l1_ratio,
                                 sparseness, beta, eta):
    """Helper function for _fit_projected_gradient"""
    n_samples, n_features = X.shape
    n_components_ = H.shape[0]

    if sparseness is None:
        Wt, gradW, iterW = _nls_subproblem(X.T, H.T, W.T, tolW, nls_max_iter,
                                           alpha=alpha, l1_ratio=l1_ratio)
    elif sparseness == 'data':
        Wt, gradW, iterW = _nls_subproblem(
            safe_vstack([X.T, np.zeros((1, n_samples))]),
            safe_vstack([H.T, np.sqrt(beta) * np.ones((1,
            W.T, tolW, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio)
    elif sparseness == 'components':
        Wt, gradW, iterW = _nls_subproblem(
                         np.zeros((n_components_, n_samples))]),
                         np.sqrt(eta) * np.eye(n_components_)]),
            W.T, tolW, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio)

    return Wt.T, gradW.T, iterW

File:      ~/.pyenv/versions/anaconda3-4.0.0/lib/python3.5/site-packages/sklearn/decomposition/nmf.py
Type:      function


In [20]: sklearn.decomposition.nmf._nls_subproblem??
Signature: sklearn.decomposition.nmf._nls_subproblem(V, W, H, tol, max_iter, alpha=0.0, l1_ratio=0.0, sigma=0.01, beta
def _nls_subproblem(V, W, H, tol, max_iter, alpha=0., l1_ratio=0.,
                    sigma=0.01, beta=0.1):
    """Non-negative least square solver

    Solves a non-negative least squares subproblem using the projected
    gradient descent algorithm.

    V : array-like, shape (n_samples, n_features)
        Constant matrix.

    W : array-like, shape (n_samples, n_components)
        Constant matrix.

    H : array-like, shape (n_components, n_features)
        Initial guess for the solution.

    tol : float
        Tolerance of the stopping condition.

    max_iter : int
        Maximum number of iterations before timing out.

    alpha : double, default: 0.
        Constant that multiplies the regularization terms. Set it to zero to
        have no regularization.

    l1_ratio : double, default: 0.
        The regularization mixing parameter, with 0 <= l1_ratio <= 1.
        For l1_ratio = 0 the penalty is an L2 penalty.
        For l1_ratio = 1 it is an L1 penalty.
        For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

    sigma : float
        Constant used in the sufficient decrease condition checked by the line
        search.  Smaller values lead to a looser sufficient decrease condition,
        thus reducing the time taken by the line search, but potentially
        increasing the number of iterations of the projected gradient
        procedure. 0.01 is a commonly used value in the optimization

    beta : float
        Factor by which the step size is decreased (resp. increased) until
        (resp. as long as) the sufficient decrease condition is satisfied.
        Larger values allow to find a better step size but lead to longer line
        search. 0.1 is a commonly used value in the optimization literature.

    H : array-like, shape (n_components, n_features)
        Solution to the non-negative least squares problem.

    grad : array-like, shape (n_components, n_features)
        The gradient.

    n_iter : int
        The number of iterations done by the algorithm.

    C.-J. Lin. Projected gradient methods for non-negative matrix
    factorization. Neural Computation, 19(2007), 2756-2779.
    WtV = safe_sparse_dot(W.T, V)
    WtW = fast_dot(W.T, W)

    # values justified in the paper (alpha is renamed gamma)
    gamma = 1
    for n_iter in range(1, max_iter + 1):
        grad = np.dot(WtW, H) - WtV
        if alpha > 0 and l1_ratio == 1.:
            grad += alpha
        elif alpha > 0:
            grad += alpha * (l1_ratio + (1 - l1_ratio) * H)

        # The following multiplication with a boolean array is more than twice
        # as fast as indexing into grad.
        if norm(grad * np.logical_or(grad < 0, H > 0)) < tol:

        Hp = H

        for inner_iter in range(20):
            # Gradient step.
            Hn = H - gamma * grad
            # Projection step.
            Hn *= Hn > 0
            d = Hn - H
            gradd = np.dot(grad.ravel(), d.ravel())
            dQd = np.dot(np.dot(WtW, d).ravel(), d.ravel())
            suff_decr = (1 - sigma) * gradd + 0.5 * dQd < 0
            if inner_iter == 0:
                decr_gamma = not suff_decr

            if decr_gamma:
                if suff_decr:
                    H = Hn
                    gamma *= beta
            elif not suff_decr or (Hp == Hn).all():
                H = Hp
                gamma /= beta
                Hp = Hn

    if n_iter == max_iter:
        warnings.warn("Iteration limit reached in nls subproblem.")

    return H, grad, n_iter

File:      ~/.pyenv/versions/anaconda3-4.0.0/lib/python3.5/site-packages/sklearn/decomposition/nmf.py
Type:      function





