Skip to content

Features

Functions

get_embeddings(story)

Load embeddings, vocabulary and word onset/offset times from the textgrid file.

Source code in src/features.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def get_embeddings(story: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Load embeddings, vocabulary and word onset/offset times from the textgrid file.
    """
    vecs, vocab = load_embeddings()

    word_grid = load_textgrid(story)["word"]

    tokens = [
        row.text.lower()
        for _, row in word_grid.iterrows()
        if row.text not in SKIP_TOKENS
    ]
    starts = np.array(
        [row.start for _, row in word_grid.iterrows() if row.text not in SKIP_TOKENS]
    )
    stops = np.array(
        [row.stop for _, row in word_grid.iterrows() if row.text not in SKIP_TOKENS]
    )

    exist_tokens = [t for t in tokens if t in vocab]

    log.info(
        f"{len(exist_tokens)}/{len(tokens)} (missing {len(tokens)-len(exist_tokens)}) story tokens found in vocab."
    )

    embs = np.array(
        [vecs[:, vocab[t]] if t in vocab else np.zeros(vecs.shape[0]) for t in tokens]
    )

    return embs, starts, stops

get_envelope(signal)

Compute the audio envelope

Source code in src/features.py
13
14
15
16
def get_envelope(signal: np.ndarray) -> np.ndarray:
    """Compute the audio envelope"""
    log.info("Computing envelope.")
    return np.abs(hilbert(signal))  # type: ignore

load_embeddings()

Load the embedding vectors and vocabulary from the EMBEDDINGS_FILE (h5py).

Source code in src/features.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def load_embeddings() -> Tuple[np.ndarray, Dict]:
    """
    Load the embedding vectors and vocabulary from the EMBEDDINGS_FILE (h5py).
    """
    with h5py.File(EMBEDDINGS_FILE, "r") as f:

        # List all groups
        log.info(f"Loading: {EMBEDDINGS_FILE}")

        # Get the data
        data = np.array(f["data"])
        vocab = {e.decode("utf-8"): i for i, e in enumerate(np.array(f["vocab"]))}

        log.info(f"data shape: {data.shape}")
        log.info(f"vocab len: {len(vocab)}")

    return data, vocab