Source code for imml.impute.mofa_imputer
# License: BSD-3-Clause
import numpy as np
import pandas as pd
from ..decomposition.mofa import MOFA
from ..utils import check_Xs_y
[docs]
class MOFAImputer(MOFA):
r"""
Impute missing data in a dataset using the `MOFA` method. [#mofapaper1]_ [#mofapaper2]_ [#mofacode]_
This class extends the `MOFA` class to provide functionality for filling in incomplete samples by
addressing both block-wise and feature-wise missing data. As a subclass of MOFA, `MOFAImputer` inherits all
input parameters and attributes from `MOFA`. Consequently, it uses the same `fit` method as MOFA for
training the model.
References
----------
.. [#mofapaper1] Argelaguet R, Velten B, Arnol D, Dietrich S, Zenz T, Marioni JC, Buettner F, Huber W, Stegle O
(2018). “Multi‐Omics Factor Analysis—a framework for unsupervised integration of multi‐omics data
sets.” Molecular Systems Biology, 14. doi:10.15252/msb.20178124.
.. [#mofapaper2] Argelaguet R, Arnol D, Bredikhin D, Deloro Y, Velten B, Marioni JC, Stegle O (2020). “MOFA+: a
statistical framework for comprehensive integration of multi-modal single-cell data.” Genome
Biology, 21. doi:10.1186/s13059-020-02015-1.
.. [#mofacode] https://biofam.github.io/MOFA2/index.html
See Also
--------
:class:`~imml.decomposition.MOFA`
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.impute import MOFAImputer
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> transformer = MOFAImputer(n_components = 5)
>>> labels = transformer.fit_transform(Xs)
"""
[docs]
def fit_transform(self, Xs, y = None, **fit_params):
r"""
Fit to data, then impute them.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples_i, n_features_i)
A list of different mods.
y : Ignored
Not used, present here for API consistency by convention.
fit_params : Ignored
Not used, present here for API consistency by convention.
Returns
-------
transformed_X : array-likes objects of shape (n_samples, n_components)
The transformed data with filled missing samples.
"""
Xs = check_Xs_y(Xs, ensure_all_finite='allow-nan')
if not isinstance(Xs[0], pd.DataFrame):
Xs = [pd.DataFrame(X) for X in Xs]
self.fit(Xs)
transformed_Xs = []
for X, w in zip(Xs, self.weights_):
transformed_X = np.dot(np.nan_to_num(self.factors_, nan=0.0), w.T)
transformed_X = pd.DataFrame(transformed_X, columns=X.columns)
transformed_Xs.append(X.fillna(transformed_X))
if self.transform_ == "pandas":
transformed_Xs = [pd.DataFrame(transformed_X, index=X.index) for X,transformed_X in zip(Xs,transformed_Xs)]
elif self.transform_ == "numpy":
transformed_Xs = [transformed_X.values for transformed_X in transformed_Xs]
return transformed_Xs