Skip to content

Data

module for loading fMRI data and features and the like

load_fmri(story, subject)

Load fMRI data. Return ndarray with shape [time, voxels].

Source code in src/encoders/data.py
249
250
251
252
253
254
255
256
257
258
259
260
261
def load_fmri(story: str, subject: str) -> np.ndarray:
    """Load fMRI data. Return ndarray with shape [time, voxels]."""

    subject_dir = Path(DATADIR, f"derivative/preprocessed_data/{subject}")
    resp_path = Path(subject_dir, f"{story}.hf5")
    hf = h5py.File(resp_path, "r")
    log.info(
        f"{story}.hf5"
        f" | {subject}"
        f" | time: {hf['data'].shape[0]}"  # type: ignore
        f" | voxels: {hf['data'].shape[1]}"  # type: ignore
    )
    return np.array(hf["data"][:])  # type: ignore

load_textgrid(story)

Loads {story}.TextGrid from 'ds003020/derivative/TextGrids' folder.

Parameters:

Name Type Description Default

story

str

Story to load

required

Returns:

Type Description
dict

Dictionary with keys 'phone' and 'word', each containing a dataframe with phone and word onset times, respectfully.

Source code in src/encoders/data.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def load_textgrid(story: str) -> dict[str, pd.DataFrame]:
    """
    Loads {story}.TextGrid from 'ds003020/derivative/TextGrids' folder.

    Parameters
    ----------
    story: str
        Story to load

    Returns
    -------
    dict
        Dictionary with keys 'phone' and 'word', each containing a dataframe
        with phone and word onset times, respectfully.

    """
    textgrid_dir = DATADIR / "derivative" / "TextGrids"
    fn = textgrid_dir / f"{story}.TextGrid"

    with open(fn, "r") as f:
        lines = f.readlines()

    word_phone_dict = parse_textgrid(lines)

    return word_phone_dict  # type: ignore

load_wav(story)

Load wav file. Return ndarray with shape [samples, channels].

Source code in src/encoders/data.py
30
31
32
33
34
35
36
37
38
39
40
41
def load_wav(story: str) -> Tuple[int, np.ndarray]:
    """Load wav file. Return ndarray with shape [samples, channels]."""

    wav_path = Path(DATADIR, WAV_DIR, f"{story}.wav")
    sample_rate, wav = wavfile.read(wav_path)

    n_chans = wav.shape[1] if len(wav.shape) == 2 else 1

    log.info(
        f"{story}.wav | channels: {n_chans} | length {wav.shape[0] / sample_rate}s"
    )
    return sample_rate, wav