# License: BSD-3-Clause
import numpy as np
import pandas as pd
from sklearn.preprocessing import FunctionTransformer
from ..utils import check_Xs_y
from ..explore import get_samples
[docs]
class DropMod(FunctionTransformer):
r"""
A transformer that drops a specified modality from a multi-modal dataset. Apply `FunctionTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html>`_
(from `Scikit-learn`) with `drop_mod` as a function.
Parameters
----------
X_idx : int, default=0
The index of the modality to drop from the input data.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import DropMod
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> transformer = DropMod(X_idx = 1)
>>> transformer.fit_transform(Xs)
"""
def __init__(self, X_idx: int = 0):
self.X_idx = X_idx
super().__init__(drop_mod, kw_args={"X_idx": X_idx})
[docs]
class ConcatenateMods(FunctionTransformer):
r"""
A transformer that concatenates all modalities from a multi-modal dataset. Apply `FunctionTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html>`_
(from `Scikit-learn`) with `concatenate_mods` as a function.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import ConcatenateMods
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> transformer = ConcatenateMods()
>>> transformer.fit_transform(Xs)
"""
def __init__(self):
super().__init__(concatenate_mods)
[docs]
class SingleMod(FunctionTransformer):
r"""
Transformer that selects a single modality from multi-modal data. Apply `FunctionTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html>`_ (from `Scikit-learn`)
with `single_mod` as a function.
Parameters
----------
X_idx : int, default=0
The index of the modality to select from the input data.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import SingleMod
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> transformer = SingleMod(X_idx = 1)
>>> transformer.fit_transform(Xs)
"""
def __init__(self, X_idx : int = 0):
self.X_idx = X_idx
super().__init__(single_mod, kw_args = {"X_idx": X_idx})
[docs]
class AddMissingMods(FunctionTransformer):
r"""
Transformer to add missing samples in each modality, in a way that all the modalities will have the same samples.
Apply `FunctionTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html>`_ (from `Scikit-learn`) with `add_missing_mods` as a function.
This transformer is applied on individual modalities, so for applying in a multi-modal dataset, we recommend
to use it with `MMTransformer`.
Parameters
----------
samples : array-like (n_samples,)
pd.Index with all samples
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import AddMissingMods
>>> from imml.explore import get_samples
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> samples = get_samples(Xs= Xs)
>>> transformer = AddMissingMods(samples= samples)
>>> transformer.fit_transform(Xs)
"""
def __init__(self, samples: pd.Index):
self.samples = samples
super().__init__(add_missing_mods, kw_args={"samples": samples})
[docs]
class SortData(FunctionTransformer):
r"""
Transformer that establish and assess the order of the incomplete multi-modal dataset. Apply
`FunctionTransformer <https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.FunctionTransformer.html>`_ (from `Scikit-learn`) with sort_data as a function.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import SortData
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> transformer = SortData()
>>> transformer.fit_transform(Xs)
"""
def __init__(self):
super().__init__(sort_data)
[docs]
def concatenate_mods(Xs: list):
r"""
A function that concatenate all features from a multi-modal dataset.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples, n_features_i)
A list of different mods.
Returns
-------
transformed_Xs : array-like, shape (n_samples, n_features)
The transformed dataset.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import concatenate_mods
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> concatenate_mods(Xs=Xs)
"""
if isinstance(Xs[0], pd.DataFrame):
transformed_X = pd.concat(Xs, axis= 1)
elif isinstance(Xs[0], np.ndarray):
transformed_X = np.concatenate(Xs, axis= 1)
return transformed_X
[docs]
def drop_mod(Xs, X_idx : int = 0):
r"""
A function that drops a specified modality from a multi-modal dataset.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples, n_features_i)
A list of different mods.
X_idx : int, default=0
The index of the mod to drop from the input data.
Returns
-------
transformed_X : list of array-likes objects (n_samples, n_features_i)
The transformed multi-modal dataset.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import drop_mod
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> drop_mod(Xs=Xs, X_idx = 1)
"""
if X_idx >= len(Xs):
raise ValueError("X_idx out of range. Should be between 0 and n_mods - 1")
Xs = check_Xs_y(Xs, ensure_all_finite='allow-nan')
transformed_Xs = Xs[:X_idx] + Xs[X_idx+1 :]
return transformed_Xs
[docs]
def single_mod(Xs, X_idx : int = 0):
r"""
A function that selects a specified modality from a multi-modal dataset.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples, n_features_i)
A list of different mods.
X_idx : int, default=0
The index of the mod to select from the input data.
Returns
-------
transformed_Xs : array-like, shape (n_samples, n_features)
The transformed dataset.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import single_mod
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> single_mod(Xs=Xs, X_idx = 1)
"""
if X_idx >= len(Xs):
raise ValueError("X_idx out of range. Should be between 0 and n_mods - 1")
Xs = check_Xs_y(Xs, ensure_all_finite='allow-nan')
transformed_X = Xs[X_idx]
return transformed_X
[docs]
def add_missing_mods(Xs, samples):
r"""
Add missing samples in each modality, in a way that all the modalities will have the same samples.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples, n_features_i)
A list of different mods.
Returns
-------
transformed_X : list of array-likes objects (n_samples, n_features_i)
The transformed multi-modal dataset.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import add_missing_mods
>>> from imml.explore import get_samples
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> samples = get_samples(Xs= Xs)
>>> add_missing_mods(Xs, samples= samples)
"""
pandas_format = isinstance(Xs[0], pd.DataFrame)
if pandas_format:
transformed_Xs = [X.T for X in Xs]
else:
transformed_Xs = [pd.DataFrame(X).T for X in Xs]
for i,transformed_X in enumerate(transformed_Xs):
transformed_X[samples.difference(transformed_X.index)] = np.nan
transformed_Xs[i] = transformed_X.T.loc[samples]
if not pandas_format:
transformed_Xs = [transformed_X.values for transformed_X in transformed_Xs]
return transformed_Xs
[docs]
def sort_data(Xs: list):
r"""
A function that establish and assess the order of the incomplete multi-modal dataset.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples, n_features_i)
A list of different modalities.
Returns
-------
transformed_X : list of array-likes objects (n_samples, n_features_i)
The transformed multi-modal dataset.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import sort_data
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> sort_data(Xs=Xs)
"""
Xs = check_Xs_y(Xs, ensure_all_finite='allow-nan')
if not isinstance(Xs[0], pd.DataFrame):
Xs = [pd.DataFrame(X) for X in Xs]
samples = get_samples(Xs=Xs)
transformed_X = [X.loc[samples.intersection(X.index)] for X in Xs]
return transformed_X