Source code for imml.impute.dfmf_imputer
# License: BSD-3-Clause
import pandas as pd
from ..decomposition import DFMF
from ..utils import check_Xs_y
[docs]
class DFMFImputer(DFMF):
r"""
Impute missing data in a dataset using the `DFMF` method. [#dfmfpaper]_ [#dfmfcode]_
This class extends the `DFMF` class to provide functionality for filling in incomplete samples by
addressing both block-wise and feature-wise missing data. As a subclass of DFMF, `DFMFImputer` inherits all
input parameters and attributes from `DFMF`. Consequently, it uses the same `fit` method as DFMF for
training the model.
References
----------
.. [#dfmfpaper] M. Žitnik and B. Zupan, "Data Fusion by Matrix Factorization," in IEEE Transactions on Pattern
Analysis and Machine Intelligence, vol. 37, no. 1, pp. 41-53, 1 Jan. 2015,
doi: 10.1109/TPAMI.2014.2343973.
.. [#dfmfcode] https://github.com/mims-harvard/scikit-fusion/tree/master
See Also
--------
:class:`~imml.decomposition.DFMF`
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.impute import DFMFImputer
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> transformer = DFMFImputer(n_components = 5)
>>> labels = transformer.fit_transform(Xs)
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
[docs]
def fit_transform(self, Xs, y = None, **fit_params):
r"""
Fit to data, then impute them.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples_i, n_features_i)
A list of different mods.
y : Ignored
Not used, present here for API consistency by convention.
fit_params : Ignored
Not used, present here for API consistency by convention.
Returns
-------
transformed_X : array-likes objects of shape (n_samples, n_components)
The transformed data with filled missing samples.
"""
self.fit(Xs)
imputed_Xs = [self.fuser_.complete(relation) for relation in self.fuser_.fusion_graph.relations]
if not isinstance(Xs[0], pd.DataFrame):
Xs = [pd.DataFrame(X) for X in Xs]
transformed_Xs = []
for X, transformed_X in zip(Xs, imputed_Xs):
transformed_X = pd.DataFrame(transformed_X, columns=X.columns)
transformed_Xs.append(X.fillna(transformed_X))
if self.transform_ == "pandas":
transformed_Xs = [pd.DataFrame(transformed_X, index=X.index, columns=X.columns)
for transformed_X, X in zip(transformed_Xs, Xs)]
return transformed_Xs