Source code for imml.impute.dfmf_imputer

# License: BSD-3-Clause

import pandas as pd

from ..decomposition import DFMF
from ..utils import check_Xs_y



[docs]
class DFMFImputer(DFMF):
    r"""
    Impute missing data in a dataset using the `DFMF` method. [#dfmfpaper]_ [#dfmfcode]_

    This class extends the `DFMF` class to provide functionality for filling in incomplete samples by
    addressing both block-wise and feature-wise missing data. As a subclass of DFMF, `DFMFImputer` inherits all
    input parameters and attributes from `DFMF`. Consequently, it uses the same `fit` method as DFMF for
    training the model.

    References
    ----------
    .. [#dfmfpaper] M. Žitnik and B. Zupan, "Data Fusion by Matrix Factorization," in IEEE Transactions on Pattern
                    Analysis and Machine Intelligence, vol. 37, no. 1, pp. 41-53, 1 Jan. 2015,
                    doi: 10.1109/TPAMI.2014.2343973.
    .. [#dfmfcode] https://github.com/mims-harvard/scikit-fusion/tree/master

    See Also
    --------
    :class:`~imml.decomposition.DFMF`

    Example
    --------
    >>> import numpy as np
    >>> import pandas as pd
    >>> from imml.impute import DFMFImputer
    >>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
    >>> transformer = DFMFImputer(n_components = 5)
    >>> labels = transformer.fit_transform(Xs)
    """


    def __init__(self, **kwargs):
        super().__init__(**kwargs)



[docs]
    def fit_transform(self, Xs, y = None, **fit_params):
        r"""
        Fit to data, then impute them.

        Parameters
        ----------
        Xs : list of array-likes objects
            - Xs length: n_mods
            - Xs[i] shape: (n_samples_i, n_features_i)

            A list of different mods.
        y : Ignored
            Not used, present here for API consistency by convention.
        fit_params : Ignored
            Not used, present here for API consistency by convention.

        Returns
        -------
        transformed_X : array-likes objects of shape (n_samples, n_components)
            The transformed data with filled missing samples.
        """

        self.fit(Xs)
        imputed_Xs = [self.fuser_.complete(relation) for relation in self.fuser_.fusion_graph.relations]
        if not isinstance(Xs[0], pd.DataFrame):
            Xs = [pd.DataFrame(X) for X in Xs]
        transformed_Xs = []
        for X, transformed_X in zip(Xs, imputed_Xs):
            transformed_X = pd.DataFrame(transformed_X, columns=X.columns)
            transformed_Xs.append(X.fillna(transformed_X))

        if self.transform_ == "pandas":
            transformed_Xs = [pd.DataFrame(transformed_X, index=X.index, columns=X.columns)
                              for transformed_X, X in zip(transformed_Xs, Xs)]
        return transformed_Xs