Source code for imml.preprocessing.multi_mod_transformer
from copy import deepcopy
from sklearn.base import BaseEstimator, TransformerMixin
from ..utils import check_Xs
[docs]
class MultiModTransformer(BaseEstimator, TransformerMixin):
r"""
A transformer that applies the same transformation to multiple modalities of data.
Parameters
----------
transformer : scikit-learn transformer object or list of scikit-learn transformer object
A scikit-learn transformer object that will be used to transform each modality of data. If a list is provided,
each transformer will be applied on each modality, otherwise the same transformer will be applied on each modality.
Attributes
----------
transformer_list_ : list of preprocessing (n_mods,)
A list of preprocessing, one for each modality of data.
same_transformer_ : boolean
A booleaing indicating if the same transformer will be applied on each modality of data.
Example
--------
>>> import numpy as np
>>> import pandas as pd
>>> from imml.preprocessing import MultiModTransformer
>>> from sklearn.impute import SimpleImputer
>>> Xs = [pd.DataFrame(np.random.default_rng(42).random((20, 10))) for i in range(3)]
>>> transformer = MultiModTransformer(transformer = SimpleImputer.set_output(transform = 'pandas'))
>>> transformer.fit_transform(Xs)
"""
def __init__(self, transformer):
self.same_transformer_ = False if isinstance(transformer, list) else True
if self.same_transformer_:
transformer_object = deepcopy(transformer)
try:
assert hasattr(transformer_object, "fit") and callable(getattr(transformer_object, "fit"))
except AssertionError:
raise ValueError("transformer must be a scikit-learn transformer like object")
else:
for transformer_object in transformer:
try:
assert hasattr(transformer_object, "fit") and callable(getattr(transformer_object, "fit"))
except AssertionError:
raise ValueError("transformer must be a scikit-learn transformer like object")
self.transformer = transformer
self.transformer_list_ = [] if self.same_transformer_ else transformer
[docs]
def fit(self, Xs, y = None):
r"""
Fit the transformer to the input data.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples, n_features_i)
A list of different modalities.
y : array-like, shape (n_samples,)
Labels for each sample. Only used by supervised algorithms.
Returns
-------
self : returns an instance of self.
"""
Xs = check_Xs(Xs, ensure_all_finite='allow-nan')
for X_idx,X in enumerate(Xs):
if self.same_transformer_:
self.transformer_list_.append(deepcopy(self.transformer))
self.transformer_list_[X_idx].fit(X, y)
return self
[docs]
def transform(self, Xs):
r"""
Transform the input data using the transformers.
Parameters
----------
Xs : list of array-likes objects
- Xs length: n_mods
- Xs[i] shape: (n_samples, n_features_i)
A list of different modalities.
Returns
-------
transformed_Xs : list of array-likes objects, shape (n_samples, n_features_i)
A list of transformed mods of data, one for each input modality.
"""
Xs = check_Xs(Xs, ensure_all_finite='allow-nan')
tranformed_Xs = [self.transformer_list_[X_idx].transform(X) for X_idx, X in enumerate(Xs)]
return tranformed_Xs