Skip to content

Regression

Functions

cross_validation_ridge_regression(X_data_list, y_data_list, n_splits, score_fct, alphas=np.logspace(-3, 3, 7))

Cross validate ridge regression

Parameters:

Name Type Description Default
X_data_list List[ndarray]

List of X data as np array for each story

required
y_data_list List[ndarray]

List of fmri data as np array for each story. Must be in same order as X_data_list.

required
n_splits int

Cross validation splits

required
score_fct fct(np.ndarray, np.ndarray) -> np.ndarray

A function taking y_test (shape = (number_trs, n_voxels)) and y_predict (same shape as y_test) and returning an array with an entry for each voxel (shape = (n_voxels))

required
alphas ndarray

Array of alpha values to optimize over

logspace(-3, 3, 7)
Source code in src/regression.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def cross_validation_ridge_regression(
    X_data_list: List[np.ndarray],
    y_data_list: List[np.ndarray],
    n_splits: int,
    score_fct: Callable[[np.ndarray, np.ndarray], np.ndarray],
    alphas: np.ndarray = np.logspace(-3, 3, 7),
) -> Tuple[np.ndarray, List[np.ndarray], List[np.ndarray], Union[float, np.ndarray]]:
    """Cross validate ridge regression

    Parameters
    ----------
    X_data_list : List[np.ndarray]
        List of X data as np array for each story
    y_data_list : List[np.ndarray]
        List of fmri data as np array for each story.
        Must be in same order as X_data_list.
    n_splits : int
        Cross validation splits
    score_fct : fct(np.ndarray, np.ndarray) -> np.ndarray
        A function taking y_test (shape = (number_trs, n_voxels))
        and y_predict (same shape as y_test) and returning an
        array with an entry for each voxel (shape = (n_voxels))
    alphas : np.ndarray
        Array of alpha values to optimize over
    """

    kf = KFold(n_splits=n_splits)

    all_scores = []
    all_weights = []
    for fold, (train_indices, test_indices) in enumerate(kf.split(X_data_list)):  # type: ignore
        log.info(f"Fold {fold}")
        X_train_list = [X_data_list[i] for i in train_indices]
        y_train_list = [y_data_list[i] for i in train_indices]
        X_test_list = [X_data_list[i] for i in test_indices]
        y_test_list = [y_data_list[i] for i in test_indices]

        X_train_unnormalized = np.concatenate(X_train_list, axis=0)
        y_train_unnormalized = np.concatenate(y_train_list, axis=0)
        X_test_unnormalized = np.concatenate(X_test_list, axis=0)
        y_test_unnormalized = np.concatenate(y_test_list, axis=0)

        X_means = X_train_unnormalized.mean(axis=0)
        y_means = y_train_unnormalized.mean(axis=0)
        X_stds = X_train_unnormalized.mean(axis=0)
        y_stds = y_train_unnormalized.mean(axis=0)

        X_train = z_score(X_train_unnormalized, X_means, X_stds)
        y_train = z_score(y_train_unnormalized, y_means, y_stds)
        X_test = z_score(X_test_unnormalized, X_means, X_stds)
        y_test = z_score(y_test_unnormalized, y_means, y_stds)

        clf = RidgeCV(alphas=alphas, alpha_per_target=True)
        clf.fit(X_train, y_train)
        best_alpha = clf.alpha_

        y_predict = clf.predict(X_test)
        fold_scores = score_fct(y_test, y_predict)

        all_scores.append(fold_scores)
        all_weights.append(clf.coef_)

    mean_scores = np.mean(all_scores, axis=0)

    return mean_scores, all_scores, all_weights, best_alpha

score_correlation(y_test, y_predict)

Returns the correlations for each voxel given predicted and true data.

Parameters:

Name Type Description Default
y_test ndarray

shape = (number_trs, n_voxels)

required
y_predict ndarray

shape = (number_trs, n_voxels)

required

Returns:

Type Description
ndarray

shape = (n_voxels)

Source code in src/regression.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def score_correlation(y_test, y_predict) -> np.ndarray:
    """Returns the correlations for each voxel given predicted and true data.

    Parameters
    ----------
    y_test : np.ndarray
        shape = (number_trs, n_voxels)
    y_predict : np.ndarray
        shape = (number_trs, n_voxels)

    Returns
    -------
    np.ndarray
        shape = (n_voxels)
    """
    return np.array(
        [np.corrcoef(y1, y2)[0, 1] for y1, y2 in zip(y_test.T, y_predict.T)]
    )