senselab.audio.tasks.features_extraction.api

This module provides functions to describe audio files by extracting features.

The extracted descriptors represent dimensions within a multi-dimensional space, allowing for a detailed analysis of each file's characteristics. These dimensions can be updated or refined in the future as part of ongoing development efforts. This module is currently a work in progress.

  1"""This module provides functions to describe audio files by extracting features.
  2
  3The extracted descriptors represent dimensions within a multi-dimensional space,
  4allowing for a detailed analysis of each file's characteristics.
  5These dimensions can be updated or refined in the future as part of ongoing development efforts.
  6This module is currently a work in progress.
  7"""
  8
  9from typing import Any, Dict, List, Union
 10
 11from senselab.audio.data_structures import Audio
 12
 13from .opensmile import extract_opensmile_features_from_audios
 14from .praat_parselmouth import extract_praat_parselmouth_features_from_audios
 15from .torchaudio import extract_torchaudio_features_from_audios
 16from .torchaudio_squim import extract_objective_quality_features_from_audios
 17
 18
 19def extract_features_from_audios(
 20    audios: List[Audio],
 21    opensmile: Union[Dict[str, str], bool] = True,
 22    parselmouth: Union[Dict[str, str], bool] = True,
 23    torchaudio: Union[Dict[str, str], bool] = True,
 24    torchaudio_squim: bool = True,
 25) -> List[Dict[str, Any]]:
 26    """Extract features from a list of audio objects.
 27
 28    Args:
 29        audios (List[Audio]): The list of audio objects to extract features from.
 30        opensmile (Union[Dict[str, str], bool]): Parameters for OpenSMILE feature extraction.
 31            If False, OpenSMILE features will not be extracted. If True, uses default OpenSMILE parameters.
 32            If a dictionary, should contain "feature_set" and "feature_level" keys.
 33        parselmouth (Union[Dict[str, str], bool]): Parameters for Praat Parselmouth feature extraction.
 34            If False, Praat Parselmouth features will not be extracted.
 35            If True, uses default Praat Parselmouth parameters.
 36            If a dictionary, should contain "time_step", "window_length", "pitch_unit", "cache_dir",
 37            "speech_rate", "intensity_descriptors", "harmonicity_descriptors", "formants", "spectral_moments", "pitch",
 38            "slope_tilt", "cpp_descriptors", "duration", "jitter", "shimmer" keys.
 39        torchaudio (Union[Dict[str, str], bool]): Parameters for torchaudio feature extraction.
 40            If False, torchaudio features will not be extracted. If True, uses default torchaudio parameters.
 41            If a dictionary, should contain "freq_low", "freq_high", "n_fft", "n_mels", "n_mfcc",
 42            "win_length" and "hop_length" keys.
 43        torchaudio_squim (bool): Parameters for torchaudio_squim feature extraction.
 44            If False, torchaudio_squim features will not be extracted.
 45
 46    Returns:
 47        List[Dict[str, Any]]: The list of feature dictionaries for each audio.
 48
 49    Examples:
 50        >>> extract_features_from_audios(audios)
 51        [{'opensmile': {'F0semitoneFrom27.5Hz_sma3nz_amean': 25.710796356201172,
 52        'F0semitoneFrom27.5Hz_sma3nz_stddevNorm': 0.1605353206396103,
 53        'F0semitoneFrom27.5Hz_sma3nz_percentile20.0': 21.095951080322266,
 54        'F0semitoneFrom27.5Hz_sma3nz_percentile50.0': 25.9762020111084,
 55        'F0semitoneFrom27.5Hz_sma3nz_percentile80.0': 29.512413024902344,
 56        'F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2': 8.416461944580078,
 57        'F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope': 82.34796905517578,
 58        'F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope': 99.20043182373047,
 59        'F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope': 22.002275466918945,
 60        'F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope': 9.043970108032227,
 61        'loudness_sma3_amean': 0.86087566614151,
 62        'loudness_sma3_stddevNorm': 0.43875235319137573,
 63        'loudness_sma3_percentile20.0': 0.5877408981323242,
 64        'loudness_sma3_percentile50.0': 0.8352401852607727,
 65        'loudness_sma3_percentile80.0': 1.1747918128967285,
 66        'loudness_sma3_pctlrange0-2': 0.5870509147644043,
 67        'loudness_sma3_meanRisingSlope': 10.285204887390137,
 68        'loudness_sma3_stddevRisingSlope': 7.544795513153076,
 69        'loudness_sma3_meanFallingSlope': 7.612527370452881,
 70        'loudness_sma3_stddevFallingSlope': 4.15903902053833,
 71        'spectralFlux_sma3_amean': 0.3213598430156708,
 72        'spectralFlux_sma3_stddevNorm': 0.6921582818031311,
 73        'mfcc1_sma3_amean': 10.274803161621094,
 74        'mfcc1_sma3_stddevNorm': 1.1581648588180542,
 75        'mfcc2_sma3_amean': 4.262020111083984,
 76        'mfcc2_sma3_stddevNorm': 2.052302837371826,
 77        'mfcc3_sma3_amean': 7.624598026275635,
 78        'mfcc3_sma3_stddevNorm': 1.4570358991622925,
 79        'mfcc4_sma3_amean': 3.6676177978515625,
 80        'mfcc4_sma3_stddevNorm': 2.6902272701263428,
 81        'jitterLocal_sma3nz_amean': 0.019597552716732025,
 82        'jitterLocal_sma3nz_stddevNorm': 0.9063860177993774,
 83        'shimmerLocaldB_sma3nz_amean': 1.264746069908142,
 84        'shimmerLocaldB_sma3nz_stddevNorm': 0.4629262685775757,
 85        'HNRdBACF_sma3nz_amean': 3.6400067806243896,
 86        'HNRdBACF_sma3nz_stddevNorm': 0.5911334753036499,
 87        'logRelF0-H1-H2_sma3nz_amean': 1.215877652168274,
 88        'logRelF0-H1-H2_sma3nz_stddevNorm': 3.883843183517456,
 89        'logRelF0-H1-A3_sma3nz_amean': 18.830764770507812,
 90        'logRelF0-H1-A3_sma3nz_stddevNorm': 0.30870768427848816,
 91        'F1frequency_sma3nz_amean': 665.1713256835938,
 92        'F1frequency_sma3nz_stddevNorm': 0.41958823800086975,
 93        'F1bandwidth_sma3nz_amean': 1300.2757568359375,
 94        'F1bandwidth_sma3nz_stddevNorm': 0.16334553062915802,
 95        'F1amplitudeLogRelF0_sma3nz_amean': -132.1533660888672,
 96        'F1amplitudeLogRelF0_sma3nz_stddevNorm': -0.6691396832466125,
 97        'F2frequency_sma3nz_amean': 1657.013916015625,
 98        'F2frequency_sma3nz_stddevNorm': 0.17019854485988617,
 99        'F2bandwidth_sma3nz_amean': 1105.7457275390625,
100        'F2bandwidth_sma3nz_stddevNorm': 0.24520403146743774,
101        'F2amplitudeLogRelF0_sma3nz_amean': -132.76707458496094,
102        'F2amplitudeLogRelF0_sma3nz_stddevNorm': -0.6468541026115417,
103        'F3frequency_sma3nz_amean': 2601.6630859375,
104        'F3frequency_sma3nz_stddevNorm': 0.11457356810569763,
105        'F3bandwidth_sma3nz_amean': 1091.15087890625,
106        'F3bandwidth_sma3nz_stddevNorm': 0.3787318468093872,
107        'F3amplitudeLogRelF0_sma3nz_amean': -134.52105712890625,
108        'F3amplitudeLogRelF0_sma3nz_stddevNorm': -0.620308518409729,
109        'alphaRatioV_sma3nz_amean': -8.626543045043945,
110        'alphaRatioV_sma3nz_stddevNorm': -0.4953792095184326,
111        'hammarbergIndexV_sma3nz_amean': 16.796842575073242,
112        'hammarbergIndexV_sma3nz_stddevNorm': 0.3567312955856323,
113        'slopeV0-500_sma3nz_amean': 0.021949246525764465,
114        'slopeV0-500_sma3nz_stddevNorm': 1.0097224712371826,
115        'slopeV500-1500_sma3nz_amean': -0.008139753714203835,
116        'slopeV500-1500_sma3nz_stddevNorm': -1.6243411302566528,
117        'spectralFluxV_sma3nz_amean': 0.4831695556640625,
118        'spectralFluxV_sma3nz_stddevNorm': 0.48576226830482483,
119        'mfcc1V_sma3nz_amean': 20.25444793701172,
120        'mfcc1V_sma3nz_stddevNorm': 0.44413772225379944,
121        'mfcc2V_sma3nz_amean': 3.619405746459961,
122        'mfcc2V_sma3nz_stddevNorm': 2.1765975952148438,
123        'mfcc3V_sma3nz_amean': 7.736487865447998,
124        'mfcc3V_sma3nz_stddevNorm': 1.8630998134613037,
125        'mfcc4V_sma3nz_amean': 4.60503625869751,
126        'mfcc4V_sma3nz_stddevNorm': 2.864668846130371,
127        'alphaRatioUV_sma3nz_amean': -2.5990121364593506,
128        'hammarbergIndexUV_sma3nz_amean': 8.862899780273438,
129        'slopeUV0-500_sma3nz_amean': 0.002166695659980178,
130        'slopeUV500-1500_sma3nz_amean': 0.006735736038535833,
131        'spectralFluxUV_sma3nz_amean': 0.24703539907932281,
132        'loudnessPeaksPerSec': 3.8834950923919678,
133        'VoicedSegmentsPerSec': 2.745098114013672,
134        'MeanVoicedSegmentLengthSec': 0.12214285880327225,
135        'StddevVoicedSegmentLengthSec': 0.09025190770626068,
136        'MeanUnvoicedSegmentLength': 0.20666664838790894,
137        'StddevUnvoicedSegmentLength': 0.17666037380695343,
138        'equivalentSoundLevel_dBp': -24.297256469726562},
139        'torchaudio': {'pitch': tensor([484.8485, 484.8485, 470.5882, 372.0930, 340.4255, 320.0000, 296.2963,
140                140.3509, 135.5932, 126.9841, 124.0310, 124.0310, 113.4752, 110.3448,
141                110.3448, 108.8435, 105.9603, 108.8435, 110.3448, 113.4752, 113.4752,
142                124.0310, 113.4752, 113.4752, 108.8435, 105.9603, 105.9603, 105.9603,
143                106.6667, 105.9603, 105.9603, 104.5752, 104.5752, 104.5752, 104.5752,
144                101.2658, 101.2658, 100.6289, 100.6289, 100.0000, 100.0000,  98.1595,
145                    98.1595,  98.1595,  95.8084,  95.8084,  95.8084,  95.2381,  95.2381,
146                    94.6746,  91.9540,  91.9540,  91.9540,  91.9540,  91.9540,  91.4286,
147                    91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  90.9091,
148                    90.9091,  90.9091,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,
149                    91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,
150                    91.4286,  91.9540,  91.9540,  93.0233,  93.5673,  93.5673,  94.1176,
151                    94.6746,  94.6746,  94.6746,  95.8084,  96.3855,  96.9697, 100.0000,
152                100.0000, 100.0000, 100.0000, 100.0000, 100.0000, 103.8961, 104.5752,
153                104.5752, 106.6667, 106.6667, 106.6667, 111.1111, 116.7883, 116.7883,
154                116.7883, 118.5185, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121,
155                121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 122.1374,
156                123.0769, 123.0769, 125.9843, 125.9843, 125.9843, 123.0769, 123.0769,
157                122.1374, 122.1374, 121.2121, 121.2121, 121.2121, 120.3008, 117.6471,
158                117.6471, 117.6471, 108.8435, 108.1081, 106.6667, 105.2632, 105.2632,
159                105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632,
160                108.8435, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632,
161                105.2632, 119.4030, 119.4030, 120.3008, 120.3008, 121.2121, 121.2121,
162                121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 122.1374,
163                122.1374, 122.1374, 122.1374, 123.0769, 123.0769, 123.0769, 123.0769,
164                123.0769, 123.0769, 120.3008, 120.3008, 120.3008, 119.4030, 113.4752,
165                106.6667, 103.2258, 103.2258,  96.9697,  96.9697,  96.9697,  96.9697,
166                    96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,
167                    96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,
168                    96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  97.5610,  97.5610,
169                    97.5610,  97.5610,  97.5610,  98.1595, 100.0000, 100.6289, 100.6289,
170                100.6289, 100.6289, 101.2658, 101.2658, 101.2658, 101.2658, 101.2658,
171                101.2658, 101.2658, 101.2658, 101.2658, 101.2658, 101.2658,  97.5610,
172                    90.9091,  89.8876,  88.8889,  88.8889,  88.3978,  87.4317,  86.0215,
173                    86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
174                    86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
175                    86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
176                    86.0215,  86.0215,  86.0215,  86.0215,  86.4865,  86.4865,  86.4865,
177                    86.4865,  86.4865,  87.4317,  87.9121,  87.9121,  87.9121,  89.8876,
178                    90.9091,  90.9091,  90.9091,  90.9091,  90.9091,  91.4286,  91.4286,
179                    91.4286,  92.4855,  92.4855,  93.0233,  93.0233,  93.0233,  93.5673,
180                    93.5673,  95.2381,  95.2381, 100.0000, 101.9108, 112.6761, 112.6761,
181                112.6761, 122.1374, 122.1374, 122.1374, 130.0813, 126.9841, 126.9841,
182                130.0813, 130.0813, 130.0813, 130.0813, 137.9310, 130.0813, 130.0813,
183                130.0813, 126.9841, 125.9843, 126.9841, 125.9843, 125.9843, 125.9843,
184                125.9843, 125.9843, 126.9841, 126.9841, 130.0813, 130.0813, 126.9841,
185                130.0813, 130.0813, 132.2314, 130.0813, 130.0813, 132.2314, 134.4538,
186                134.4538, 135.5932, 135.5932, 137.9310, 135.5932, 135.5932, 135.5932,
187                135.5932, 137.9310, 137.9310, 140.3509, 141.5929, 141.5929, 141.5929,
188                144.1441, 144.1441, 149.5327, 149.5327, 149.5327, 141.5929, 141.5929,
189                141.5929, 149.5327, 149.5327, 153.8462, 160.0000, 160.0000, 160.0000,
190                160.0000, 160.0000, 163.2653, 164.9485, 164.9485, 164.9485, 164.9485,
191                164.9485, 164.9485, 164.9485, 164.9485, 164.9485, 164.9485, 164.9485,
192                164.9485, 164.9485, 164.9485, 164.9485, 156.8627, 155.3398, 155.3398,
193                155.3398, 153.8462, 153.8462, 152.3810, 152.3810, 149.5327, 148.1481,
194                148.1481, 148.1481, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
195                148.1481, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
196                146.7890, 146.7890, 145.4545, 145.4545, 152.3810, 153.8462, 153.8462,
197                153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462,
198                153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462,
199                153.8462, 153.8462, 153.8462, 153.8462, 152.3810, 152.3810, 152.3810,
200                152.3810, 149.5327, 148.1481, 148.1481, 148.1481, 148.1481, 148.1481,
201                148.1481, 146.7890, 148.1481, 148.1481, 145.4545, 145.4545, 145.4545,
202                145.4545, 145.4545, 144.1441, 144.1441, 144.1441, 142.8571, 142.8571,
203                142.8571, 142.8571, 142.8571, 142.8571, 142.8571, 144.1441, 144.1441,
204                145.4545, 145.4545, 145.4545, 145.4545, 146.7890, 146.7890, 146.7890,
205                146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
206                146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 145.4545,
207                145.4545, 145.4545, 145.4545, 145.4545, 145.4545, 145.4545, 145.4545,
208                146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
209                400.0000, 400.0000, 484.8485, 484.8485, 484.8485, 484.8485, 484.8485,
210                484.8485, 484.8485, 484.8485, 484.8485, 484.8485]),
211        'mel_filter_bank': tensor([[4.4167e-04, 1.0165e-02, 1.3079e-02,  ..., 0.0000e+00, 0.0000e+00,
212                    0.0000e+00],
213                [3.0977e-04, 1.5698e-02, 1.5785e-02,  ..., 0.0000e+00, 0.0000e+00,
214                    0.0000e+00],
215                [8.2318e-05, 1.4367e-02, 2.8095e-01,  ..., 0.0000e+00, 0.0000e+00,
216                    0.0000e+00],
217                ...,
218                [3.6322e-05, 9.7330e-03, 5.4812e-02,  ..., 0.0000e+00, 0.0000e+00,
219                    0.0000e+00],
220                [2.2802e-05, 1.2481e-02, 5.8374e-02,  ..., 0.0000e+00, 0.0000e+00,
221                    0.0000e+00],
222                [5.3029e-05, 3.1305e-02, 7.9842e-02,  ..., 0.0000e+00, 0.0000e+00,
223                    0.0000e+00]]),
224        'mfcc': tensor([[-6.2570e+02, -4.7505e+02, -3.1078e+02,  ..., -6.3893e+02,
225                    -6.3893e+02, -6.3893e+02],
226                [ 1.3593e+01,  1.9928e+01,  2.6022e+01,  ...,  3.9824e-05,
227                    3.9824e-05,  3.9824e-05],
228                [ 7.3933e+00, -2.1680e+01, -1.4259e+01,  ..., -1.3440e-05,
229                    -1.3440e-05, -1.3440e-05],
230                ...,
231                [ 1.8122e+00, -3.1072e+00, -3.7336e+00,  ...,  7.0669e-05,
232                    7.0669e-05,  7.0669e-05],
233                [-2.7518e-01, -9.4738e+00, -2.3157e+00,  ..., -1.7963e-04,
234                    -1.7963e-04, -1.7963e-04],
235                [ 2.3144e-01, -6.4129e+00, -8.4420e+00,  ..., -1.5891e-04,
236                    -1.5891e-04, -1.5891e-04]]),
237        'mel_spectrogram': tensor([[4.4167e-04, 1.0165e-02, 1.3079e-02,  ..., 0.0000e+00, 0.0000e+00,
238                    0.0000e+00],
239                [3.0977e-04, 1.5698e-02, 1.5785e-02,  ..., 0.0000e+00, 0.0000e+00,
240                    0.0000e+00],
241                [8.2318e-05, 1.4367e-02, 2.8095e-01,  ..., 0.0000e+00, 0.0000e+00,
242                    0.0000e+00],
243                ...,
244                [3.6322e-05, 9.7330e-03, 5.4812e-02,  ..., 0.0000e+00, 0.0000e+00,
245                    0.0000e+00],
246                [2.2802e-05, 1.2481e-02, 5.8374e-02,  ..., 0.0000e+00, 0.0000e+00,
247                    0.0000e+00],
248                [5.3029e-05, 3.1305e-02, 7.9842e-02,  ..., 0.0000e+00, 0.0000e+00,
249                    0.0000e+00]]),
250        'spectrogram': tensor([[3.5553e-06, 5.9962e-03, 2.7176e-02,  ..., 0.0000e+00, 0.0000e+00,
251                    0.0000e+00],
252                [5.0707e-04, 1.1670e-02, 1.5016e-02,  ..., 0.0000e+00, 0.0000e+00,
253                    0.0000e+00],
254                [3.1901e-04, 1.8529e-02, 1.8078e-02,  ..., 0.0000e+00, 0.0000e+00,
255                    0.0000e+00],
256                ...,
257                [1.0302e-05, 3.5917e-03, 2.7169e-03,  ..., 0.0000e+00, 0.0000e+00,
258                    0.0000e+00],
259                [9.6637e-08, 1.3364e-03, 1.8495e-02,  ..., 0.0000e+00, 0.0000e+00,
260                    0.0000e+00],
261                [1.4414e-05, 1.0598e-04, 2.8004e-02,  ..., 0.0000e+00, 0.0000e+00,
262                    0.0000e+00]])},
263        'parselmouth': ({'duration': 5.1613125,
264            'speaking_rate': 3.874983349680919,
265            'articulation_rate': 3.874983349680919,
266            'phonation_ratio': 1.0,
267            'pause_rate': 0.0,
268            'mean_pause_duration': 0.0,
269            'mean_f0_hertz': 118.59917806814313,
270            'std_f0_hertz': 30.232960797931817,
271            'mean_intensity_db': 69.76277128148347,
272            'std_intensity_db': 58.54414165935646,
273            'range_ratio_intensity_db': -0.25736445047981316,
274            'pitch_floor': 60.0,
275            'pitch_ceiling': 250.0,
276            'mean_hnr_db': 3.3285614070654375,
277            'std_hnr_db': 3.36490968797237,
278            'spectral_slope': -13.982306776816046,
279            'spectral_tilt': -0.004414961849917737,
280            'cepstral_peak_prominence_mean': 7.0388038514346825,
281            'cepstral_peak_prominence_std': 1.5672438573255245,
282            'mean_f1_loc': 613.4664268420964,
283            'std_f1_loc': 303.98235579059883,
284            'mean_b1_loc': 401.96960219300837,
285            'std_b1_loc': 400.9001719378358,
286            'mean_f2_loc': 1701.7755281579418,
287            'std_f2_loc': 325.4405394017738,
288            'mean_b2_loc': 434.542188503193,
289            'std_b2_loc': 380.8914612651878,
290            'spectral_gravity': 579.587511962247,
291            'spectral_std_dev': 651.3025011919739,
292            'spectral_skewness': 3.5879707548251045,
293            'spectral_kurtosis': 19.991495997865282,
294            'local_jitter': 0.02553484151620524,
295            'localabsolute_jitter': 0.00021392842618599855,
296            'rap_jitter': 0.012174051087556429,
297            'ppq5_jitter': 0.01597797849248675,
298            'ddp_jitter': 0.03652215326266929,
299            'local_shimmer': 0.1530474665829716,
300            'localDB_shimmer': 1.3511061323188314,
301            'apq3_shimmer': 0.0702984931637734,
302            'apq5_shimmer': 0.09680154282272849,
303            'apq11_shimmer': 0.19065409516266155,
304            'dda_shimmer': 0.2108954794913202},),
305        'torchaudio_squim': {'stoi': 0.9247563481330872,
306        'pesq': 1.3702949285507202,
307        'si_sdr': 11.71167278289795}}]
308    """
309    if opensmile:
310        default_opensmile = {
311            "feature_set": "eGeMAPSv02",
312            "feature_level": "Functionals",
313            "plugin": "serial",
314            "plugin_args": {},
315            "cache_dir": None,
316        }
317        if isinstance(opensmile, dict):
318            my_opensmile = {**default_opensmile, **opensmile}
319        else:
320            my_opensmile = default_opensmile
321        opensmile_features = extract_opensmile_features_from_audios(audios, **my_opensmile)  # type: ignore
322    if parselmouth:
323        default_parselmouth = {
324            "time_step": 0.005,
325            "window_length": 0.025,
326            "pitch_unit": "Hertz",
327            "cache_dir": None,
328            "speech_rate": True,
329            "intensity_descriptors": True,
330            "harmonicity_descriptors": True,
331            "formants": True,
332            "spectral_moments": True,
333            "pitch": True,
334            "slope_tilt": True,
335            "cpp_descriptors": True,
336            "duration": True,
337            "jitter": True,
338            "shimmer": True,
339            "plugin": "serial",
340            "plugin_args": {},
341        }
342        # Update default_parselmouth with provided parselmouth dictionary
343        if isinstance(parselmouth, dict):
344            my_parselmouth = {**default_parselmouth, **parselmouth}
345        else:
346            my_parselmouth = default_parselmouth
347
348        parselmouth_features = extract_praat_parselmouth_features_from_audios(audios=audios, **my_parselmouth)  # type: ignore
349
350    if torchaudio:
351        default_torchaudio: Dict[str, Any] = {
352            "freq_low": 80,
353            "freq_high": 500,
354            "n_fft": 1024,
355            "n_mels": 128,
356            "n_mfcc": 40,
357            "win_length": None,
358            "hop_length": None,
359            "plugin": "serial",
360            "plugin_args": {},
361            "cache_dir": None,
362        }
363        if isinstance(torchaudio, dict):
364            my_torchaudio = {**default_torchaudio, **torchaudio}
365        else:
366            my_torchaudio = default_torchaudio
367
368        torchaudio_features = extract_torchaudio_features_from_audios(audios=audios, **my_torchaudio)  # type: ignore
369    if torchaudio_squim:
370        torchaudio_squim_features = extract_objective_quality_features_from_audios(audios=audios)
371
372    results = []
373    for i in range(len(audios)):
374        result = {}
375        if opensmile:
376            result["opensmile"] = opensmile_features[i]
377        if parselmouth:
378            result["praat_parselmouth"] = parselmouth_features[i]
379        if torchaudio:
380            result["torchaudio"] = torchaudio_features[i]
381        if torchaudio_squim:
382            result["torchaudio_squim"] = torchaudio_squim_features[i]
383        results.append(result)
384
385    return results
def extract_features_from_audios( audios: List[senselab.audio.data_structures.audio.Audio], opensmile: Union[Dict[str, str], bool] = True, parselmouth: Union[Dict[str, str], bool] = True, torchaudio: Union[Dict[str, str], bool] = True, torchaudio_squim: bool = True) -> List[Dict[str, Any]]:
 20def extract_features_from_audios(
 21    audios: List[Audio],
 22    opensmile: Union[Dict[str, str], bool] = True,
 23    parselmouth: Union[Dict[str, str], bool] = True,
 24    torchaudio: Union[Dict[str, str], bool] = True,
 25    torchaudio_squim: bool = True,
 26) -> List[Dict[str, Any]]:
 27    """Extract features from a list of audio objects.
 28
 29    Args:
 30        audios (List[Audio]): The list of audio objects to extract features from.
 31        opensmile (Union[Dict[str, str], bool]): Parameters for OpenSMILE feature extraction.
 32            If False, OpenSMILE features will not be extracted. If True, uses default OpenSMILE parameters.
 33            If a dictionary, should contain "feature_set" and "feature_level" keys.
 34        parselmouth (Union[Dict[str, str], bool]): Parameters for Praat Parselmouth feature extraction.
 35            If False, Praat Parselmouth features will not be extracted.
 36            If True, uses default Praat Parselmouth parameters.
 37            If a dictionary, should contain "time_step", "window_length", "pitch_unit", "cache_dir",
 38            "speech_rate", "intensity_descriptors", "harmonicity_descriptors", "formants", "spectral_moments", "pitch",
 39            "slope_tilt", "cpp_descriptors", "duration", "jitter", "shimmer" keys.
 40        torchaudio (Union[Dict[str, str], bool]): Parameters for torchaudio feature extraction.
 41            If False, torchaudio features will not be extracted. If True, uses default torchaudio parameters.
 42            If a dictionary, should contain "freq_low", "freq_high", "n_fft", "n_mels", "n_mfcc",
 43            "win_length" and "hop_length" keys.
 44        torchaudio_squim (bool): Parameters for torchaudio_squim feature extraction.
 45            If False, torchaudio_squim features will not be extracted.
 46
 47    Returns:
 48        List[Dict[str, Any]]: The list of feature dictionaries for each audio.
 49
 50    Examples:
 51        >>> extract_features_from_audios(audios)
 52        [{'opensmile': {'F0semitoneFrom27.5Hz_sma3nz_amean': 25.710796356201172,
 53        'F0semitoneFrom27.5Hz_sma3nz_stddevNorm': 0.1605353206396103,
 54        'F0semitoneFrom27.5Hz_sma3nz_percentile20.0': 21.095951080322266,
 55        'F0semitoneFrom27.5Hz_sma3nz_percentile50.0': 25.9762020111084,
 56        'F0semitoneFrom27.5Hz_sma3nz_percentile80.0': 29.512413024902344,
 57        'F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2': 8.416461944580078,
 58        'F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope': 82.34796905517578,
 59        'F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope': 99.20043182373047,
 60        'F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope': 22.002275466918945,
 61        'F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope': 9.043970108032227,
 62        'loudness_sma3_amean': 0.86087566614151,
 63        'loudness_sma3_stddevNorm': 0.43875235319137573,
 64        'loudness_sma3_percentile20.0': 0.5877408981323242,
 65        'loudness_sma3_percentile50.0': 0.8352401852607727,
 66        'loudness_sma3_percentile80.0': 1.1747918128967285,
 67        'loudness_sma3_pctlrange0-2': 0.5870509147644043,
 68        'loudness_sma3_meanRisingSlope': 10.285204887390137,
 69        'loudness_sma3_stddevRisingSlope': 7.544795513153076,
 70        'loudness_sma3_meanFallingSlope': 7.612527370452881,
 71        'loudness_sma3_stddevFallingSlope': 4.15903902053833,
 72        'spectralFlux_sma3_amean': 0.3213598430156708,
 73        'spectralFlux_sma3_stddevNorm': 0.6921582818031311,
 74        'mfcc1_sma3_amean': 10.274803161621094,
 75        'mfcc1_sma3_stddevNorm': 1.1581648588180542,
 76        'mfcc2_sma3_amean': 4.262020111083984,
 77        'mfcc2_sma3_stddevNorm': 2.052302837371826,
 78        'mfcc3_sma3_amean': 7.624598026275635,
 79        'mfcc3_sma3_stddevNorm': 1.4570358991622925,
 80        'mfcc4_sma3_amean': 3.6676177978515625,
 81        'mfcc4_sma3_stddevNorm': 2.6902272701263428,
 82        'jitterLocal_sma3nz_amean': 0.019597552716732025,
 83        'jitterLocal_sma3nz_stddevNorm': 0.9063860177993774,
 84        'shimmerLocaldB_sma3nz_amean': 1.264746069908142,
 85        'shimmerLocaldB_sma3nz_stddevNorm': 0.4629262685775757,
 86        'HNRdBACF_sma3nz_amean': 3.6400067806243896,
 87        'HNRdBACF_sma3nz_stddevNorm': 0.5911334753036499,
 88        'logRelF0-H1-H2_sma3nz_amean': 1.215877652168274,
 89        'logRelF0-H1-H2_sma3nz_stddevNorm': 3.883843183517456,
 90        'logRelF0-H1-A3_sma3nz_amean': 18.830764770507812,
 91        'logRelF0-H1-A3_sma3nz_stddevNorm': 0.30870768427848816,
 92        'F1frequency_sma3nz_amean': 665.1713256835938,
 93        'F1frequency_sma3nz_stddevNorm': 0.41958823800086975,
 94        'F1bandwidth_sma3nz_amean': 1300.2757568359375,
 95        'F1bandwidth_sma3nz_stddevNorm': 0.16334553062915802,
 96        'F1amplitudeLogRelF0_sma3nz_amean': -132.1533660888672,
 97        'F1amplitudeLogRelF0_sma3nz_stddevNorm': -0.6691396832466125,
 98        'F2frequency_sma3nz_amean': 1657.013916015625,
 99        'F2frequency_sma3nz_stddevNorm': 0.17019854485988617,
100        'F2bandwidth_sma3nz_amean': 1105.7457275390625,
101        'F2bandwidth_sma3nz_stddevNorm': 0.24520403146743774,
102        'F2amplitudeLogRelF0_sma3nz_amean': -132.76707458496094,
103        'F2amplitudeLogRelF0_sma3nz_stddevNorm': -0.6468541026115417,
104        'F3frequency_sma3nz_amean': 2601.6630859375,
105        'F3frequency_sma3nz_stddevNorm': 0.11457356810569763,
106        'F3bandwidth_sma3nz_amean': 1091.15087890625,
107        'F3bandwidth_sma3nz_stddevNorm': 0.3787318468093872,
108        'F3amplitudeLogRelF0_sma3nz_amean': -134.52105712890625,
109        'F3amplitudeLogRelF0_sma3nz_stddevNorm': -0.620308518409729,
110        'alphaRatioV_sma3nz_amean': -8.626543045043945,
111        'alphaRatioV_sma3nz_stddevNorm': -0.4953792095184326,
112        'hammarbergIndexV_sma3nz_amean': 16.796842575073242,
113        'hammarbergIndexV_sma3nz_stddevNorm': 0.3567312955856323,
114        'slopeV0-500_sma3nz_amean': 0.021949246525764465,
115        'slopeV0-500_sma3nz_stddevNorm': 1.0097224712371826,
116        'slopeV500-1500_sma3nz_amean': -0.008139753714203835,
117        'slopeV500-1500_sma3nz_stddevNorm': -1.6243411302566528,
118        'spectralFluxV_sma3nz_amean': 0.4831695556640625,
119        'spectralFluxV_sma3nz_stddevNorm': 0.48576226830482483,
120        'mfcc1V_sma3nz_amean': 20.25444793701172,
121        'mfcc1V_sma3nz_stddevNorm': 0.44413772225379944,
122        'mfcc2V_sma3nz_amean': 3.619405746459961,
123        'mfcc2V_sma3nz_stddevNorm': 2.1765975952148438,
124        'mfcc3V_sma3nz_amean': 7.736487865447998,
125        'mfcc3V_sma3nz_stddevNorm': 1.8630998134613037,
126        'mfcc4V_sma3nz_amean': 4.60503625869751,
127        'mfcc4V_sma3nz_stddevNorm': 2.864668846130371,
128        'alphaRatioUV_sma3nz_amean': -2.5990121364593506,
129        'hammarbergIndexUV_sma3nz_amean': 8.862899780273438,
130        'slopeUV0-500_sma3nz_amean': 0.002166695659980178,
131        'slopeUV500-1500_sma3nz_amean': 0.006735736038535833,
132        'spectralFluxUV_sma3nz_amean': 0.24703539907932281,
133        'loudnessPeaksPerSec': 3.8834950923919678,
134        'VoicedSegmentsPerSec': 2.745098114013672,
135        'MeanVoicedSegmentLengthSec': 0.12214285880327225,
136        'StddevVoicedSegmentLengthSec': 0.09025190770626068,
137        'MeanUnvoicedSegmentLength': 0.20666664838790894,
138        'StddevUnvoicedSegmentLength': 0.17666037380695343,
139        'equivalentSoundLevel_dBp': -24.297256469726562},
140        'torchaudio': {'pitch': tensor([484.8485, 484.8485, 470.5882, 372.0930, 340.4255, 320.0000, 296.2963,
141                140.3509, 135.5932, 126.9841, 124.0310, 124.0310, 113.4752, 110.3448,
142                110.3448, 108.8435, 105.9603, 108.8435, 110.3448, 113.4752, 113.4752,
143                124.0310, 113.4752, 113.4752, 108.8435, 105.9603, 105.9603, 105.9603,
144                106.6667, 105.9603, 105.9603, 104.5752, 104.5752, 104.5752, 104.5752,
145                101.2658, 101.2658, 100.6289, 100.6289, 100.0000, 100.0000,  98.1595,
146                    98.1595,  98.1595,  95.8084,  95.8084,  95.8084,  95.2381,  95.2381,
147                    94.6746,  91.9540,  91.9540,  91.9540,  91.9540,  91.9540,  91.4286,
148                    91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  90.9091,
149                    90.9091,  90.9091,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,
150                    91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,
151                    91.4286,  91.9540,  91.9540,  93.0233,  93.5673,  93.5673,  94.1176,
152                    94.6746,  94.6746,  94.6746,  95.8084,  96.3855,  96.9697, 100.0000,
153                100.0000, 100.0000, 100.0000, 100.0000, 100.0000, 103.8961, 104.5752,
154                104.5752, 106.6667, 106.6667, 106.6667, 111.1111, 116.7883, 116.7883,
155                116.7883, 118.5185, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121,
156                121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 122.1374,
157                123.0769, 123.0769, 125.9843, 125.9843, 125.9843, 123.0769, 123.0769,
158                122.1374, 122.1374, 121.2121, 121.2121, 121.2121, 120.3008, 117.6471,
159                117.6471, 117.6471, 108.8435, 108.1081, 106.6667, 105.2632, 105.2632,
160                105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632,
161                108.8435, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632,
162                105.2632, 119.4030, 119.4030, 120.3008, 120.3008, 121.2121, 121.2121,
163                121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 122.1374,
164                122.1374, 122.1374, 122.1374, 123.0769, 123.0769, 123.0769, 123.0769,
165                123.0769, 123.0769, 120.3008, 120.3008, 120.3008, 119.4030, 113.4752,
166                106.6667, 103.2258, 103.2258,  96.9697,  96.9697,  96.9697,  96.9697,
167                    96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,
168                    96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,
169                    96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  97.5610,  97.5610,
170                    97.5610,  97.5610,  97.5610,  98.1595, 100.0000, 100.6289, 100.6289,
171                100.6289, 100.6289, 101.2658, 101.2658, 101.2658, 101.2658, 101.2658,
172                101.2658, 101.2658, 101.2658, 101.2658, 101.2658, 101.2658,  97.5610,
173                    90.9091,  89.8876,  88.8889,  88.8889,  88.3978,  87.4317,  86.0215,
174                    86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
175                    86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
176                    86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
177                    86.0215,  86.0215,  86.0215,  86.0215,  86.4865,  86.4865,  86.4865,
178                    86.4865,  86.4865,  87.4317,  87.9121,  87.9121,  87.9121,  89.8876,
179                    90.9091,  90.9091,  90.9091,  90.9091,  90.9091,  91.4286,  91.4286,
180                    91.4286,  92.4855,  92.4855,  93.0233,  93.0233,  93.0233,  93.5673,
181                    93.5673,  95.2381,  95.2381, 100.0000, 101.9108, 112.6761, 112.6761,
182                112.6761, 122.1374, 122.1374, 122.1374, 130.0813, 126.9841, 126.9841,
183                130.0813, 130.0813, 130.0813, 130.0813, 137.9310, 130.0813, 130.0813,
184                130.0813, 126.9841, 125.9843, 126.9841, 125.9843, 125.9843, 125.9843,
185                125.9843, 125.9843, 126.9841, 126.9841, 130.0813, 130.0813, 126.9841,
186                130.0813, 130.0813, 132.2314, 130.0813, 130.0813, 132.2314, 134.4538,
187                134.4538, 135.5932, 135.5932, 137.9310, 135.5932, 135.5932, 135.5932,
188                135.5932, 137.9310, 137.9310, 140.3509, 141.5929, 141.5929, 141.5929,
189                144.1441, 144.1441, 149.5327, 149.5327, 149.5327, 141.5929, 141.5929,
190                141.5929, 149.5327, 149.5327, 153.8462, 160.0000, 160.0000, 160.0000,
191                160.0000, 160.0000, 163.2653, 164.9485, 164.9485, 164.9485, 164.9485,
192                164.9485, 164.9485, 164.9485, 164.9485, 164.9485, 164.9485, 164.9485,
193                164.9485, 164.9485, 164.9485, 164.9485, 156.8627, 155.3398, 155.3398,
194                155.3398, 153.8462, 153.8462, 152.3810, 152.3810, 149.5327, 148.1481,
195                148.1481, 148.1481, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
196                148.1481, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
197                146.7890, 146.7890, 145.4545, 145.4545, 152.3810, 153.8462, 153.8462,
198                153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462,
199                153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462,
200                153.8462, 153.8462, 153.8462, 153.8462, 152.3810, 152.3810, 152.3810,
201                152.3810, 149.5327, 148.1481, 148.1481, 148.1481, 148.1481, 148.1481,
202                148.1481, 146.7890, 148.1481, 148.1481, 145.4545, 145.4545, 145.4545,
203                145.4545, 145.4545, 144.1441, 144.1441, 144.1441, 142.8571, 142.8571,
204                142.8571, 142.8571, 142.8571, 142.8571, 142.8571, 144.1441, 144.1441,
205                145.4545, 145.4545, 145.4545, 145.4545, 146.7890, 146.7890, 146.7890,
206                146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
207                146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 145.4545,
208                145.4545, 145.4545, 145.4545, 145.4545, 145.4545, 145.4545, 145.4545,
209                146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
210                400.0000, 400.0000, 484.8485, 484.8485, 484.8485, 484.8485, 484.8485,
211                484.8485, 484.8485, 484.8485, 484.8485, 484.8485]),
212        'mel_filter_bank': tensor([[4.4167e-04, 1.0165e-02, 1.3079e-02,  ..., 0.0000e+00, 0.0000e+00,
213                    0.0000e+00],
214                [3.0977e-04, 1.5698e-02, 1.5785e-02,  ..., 0.0000e+00, 0.0000e+00,
215                    0.0000e+00],
216                [8.2318e-05, 1.4367e-02, 2.8095e-01,  ..., 0.0000e+00, 0.0000e+00,
217                    0.0000e+00],
218                ...,
219                [3.6322e-05, 9.7330e-03, 5.4812e-02,  ..., 0.0000e+00, 0.0000e+00,
220                    0.0000e+00],
221                [2.2802e-05, 1.2481e-02, 5.8374e-02,  ..., 0.0000e+00, 0.0000e+00,
222                    0.0000e+00],
223                [5.3029e-05, 3.1305e-02, 7.9842e-02,  ..., 0.0000e+00, 0.0000e+00,
224                    0.0000e+00]]),
225        'mfcc': tensor([[-6.2570e+02, -4.7505e+02, -3.1078e+02,  ..., -6.3893e+02,
226                    -6.3893e+02, -6.3893e+02],
227                [ 1.3593e+01,  1.9928e+01,  2.6022e+01,  ...,  3.9824e-05,
228                    3.9824e-05,  3.9824e-05],
229                [ 7.3933e+00, -2.1680e+01, -1.4259e+01,  ..., -1.3440e-05,
230                    -1.3440e-05, -1.3440e-05],
231                ...,
232                [ 1.8122e+00, -3.1072e+00, -3.7336e+00,  ...,  7.0669e-05,
233                    7.0669e-05,  7.0669e-05],
234                [-2.7518e-01, -9.4738e+00, -2.3157e+00,  ..., -1.7963e-04,
235                    -1.7963e-04, -1.7963e-04],
236                [ 2.3144e-01, -6.4129e+00, -8.4420e+00,  ..., -1.5891e-04,
237                    -1.5891e-04, -1.5891e-04]]),
238        'mel_spectrogram': tensor([[4.4167e-04, 1.0165e-02, 1.3079e-02,  ..., 0.0000e+00, 0.0000e+00,
239                    0.0000e+00],
240                [3.0977e-04, 1.5698e-02, 1.5785e-02,  ..., 0.0000e+00, 0.0000e+00,
241                    0.0000e+00],
242                [8.2318e-05, 1.4367e-02, 2.8095e-01,  ..., 0.0000e+00, 0.0000e+00,
243                    0.0000e+00],
244                ...,
245                [3.6322e-05, 9.7330e-03, 5.4812e-02,  ..., 0.0000e+00, 0.0000e+00,
246                    0.0000e+00],
247                [2.2802e-05, 1.2481e-02, 5.8374e-02,  ..., 0.0000e+00, 0.0000e+00,
248                    0.0000e+00],
249                [5.3029e-05, 3.1305e-02, 7.9842e-02,  ..., 0.0000e+00, 0.0000e+00,
250                    0.0000e+00]]),
251        'spectrogram': tensor([[3.5553e-06, 5.9962e-03, 2.7176e-02,  ..., 0.0000e+00, 0.0000e+00,
252                    0.0000e+00],
253                [5.0707e-04, 1.1670e-02, 1.5016e-02,  ..., 0.0000e+00, 0.0000e+00,
254                    0.0000e+00],
255                [3.1901e-04, 1.8529e-02, 1.8078e-02,  ..., 0.0000e+00, 0.0000e+00,
256                    0.0000e+00],
257                ...,
258                [1.0302e-05, 3.5917e-03, 2.7169e-03,  ..., 0.0000e+00, 0.0000e+00,
259                    0.0000e+00],
260                [9.6637e-08, 1.3364e-03, 1.8495e-02,  ..., 0.0000e+00, 0.0000e+00,
261                    0.0000e+00],
262                [1.4414e-05, 1.0598e-04, 2.8004e-02,  ..., 0.0000e+00, 0.0000e+00,
263                    0.0000e+00]])},
264        'parselmouth': ({'duration': 5.1613125,
265            'speaking_rate': 3.874983349680919,
266            'articulation_rate': 3.874983349680919,
267            'phonation_ratio': 1.0,
268            'pause_rate': 0.0,
269            'mean_pause_duration': 0.0,
270            'mean_f0_hertz': 118.59917806814313,
271            'std_f0_hertz': 30.232960797931817,
272            'mean_intensity_db': 69.76277128148347,
273            'std_intensity_db': 58.54414165935646,
274            'range_ratio_intensity_db': -0.25736445047981316,
275            'pitch_floor': 60.0,
276            'pitch_ceiling': 250.0,
277            'mean_hnr_db': 3.3285614070654375,
278            'std_hnr_db': 3.36490968797237,
279            'spectral_slope': -13.982306776816046,
280            'spectral_tilt': -0.004414961849917737,
281            'cepstral_peak_prominence_mean': 7.0388038514346825,
282            'cepstral_peak_prominence_std': 1.5672438573255245,
283            'mean_f1_loc': 613.4664268420964,
284            'std_f1_loc': 303.98235579059883,
285            'mean_b1_loc': 401.96960219300837,
286            'std_b1_loc': 400.9001719378358,
287            'mean_f2_loc': 1701.7755281579418,
288            'std_f2_loc': 325.4405394017738,
289            'mean_b2_loc': 434.542188503193,
290            'std_b2_loc': 380.8914612651878,
291            'spectral_gravity': 579.587511962247,
292            'spectral_std_dev': 651.3025011919739,
293            'spectral_skewness': 3.5879707548251045,
294            'spectral_kurtosis': 19.991495997865282,
295            'local_jitter': 0.02553484151620524,
296            'localabsolute_jitter': 0.00021392842618599855,
297            'rap_jitter': 0.012174051087556429,
298            'ppq5_jitter': 0.01597797849248675,
299            'ddp_jitter': 0.03652215326266929,
300            'local_shimmer': 0.1530474665829716,
301            'localDB_shimmer': 1.3511061323188314,
302            'apq3_shimmer': 0.0702984931637734,
303            'apq5_shimmer': 0.09680154282272849,
304            'apq11_shimmer': 0.19065409516266155,
305            'dda_shimmer': 0.2108954794913202},),
306        'torchaudio_squim': {'stoi': 0.9247563481330872,
307        'pesq': 1.3702949285507202,
308        'si_sdr': 11.71167278289795}}]
309    """
310    if opensmile:
311        default_opensmile = {
312            "feature_set": "eGeMAPSv02",
313            "feature_level": "Functionals",
314            "plugin": "serial",
315            "plugin_args": {},
316            "cache_dir": None,
317        }
318        if isinstance(opensmile, dict):
319            my_opensmile = {**default_opensmile, **opensmile}
320        else:
321            my_opensmile = default_opensmile
322        opensmile_features = extract_opensmile_features_from_audios(audios, **my_opensmile)  # type: ignore
323    if parselmouth:
324        default_parselmouth = {
325            "time_step": 0.005,
326            "window_length": 0.025,
327            "pitch_unit": "Hertz",
328            "cache_dir": None,
329            "speech_rate": True,
330            "intensity_descriptors": True,
331            "harmonicity_descriptors": True,
332            "formants": True,
333            "spectral_moments": True,
334            "pitch": True,
335            "slope_tilt": True,
336            "cpp_descriptors": True,
337            "duration": True,
338            "jitter": True,
339            "shimmer": True,
340            "plugin": "serial",
341            "plugin_args": {},
342        }
343        # Update default_parselmouth with provided parselmouth dictionary
344        if isinstance(parselmouth, dict):
345            my_parselmouth = {**default_parselmouth, **parselmouth}
346        else:
347            my_parselmouth = default_parselmouth
348
349        parselmouth_features = extract_praat_parselmouth_features_from_audios(audios=audios, **my_parselmouth)  # type: ignore
350
351    if torchaudio:
352        default_torchaudio: Dict[str, Any] = {
353            "freq_low": 80,
354            "freq_high": 500,
355            "n_fft": 1024,
356            "n_mels": 128,
357            "n_mfcc": 40,
358            "win_length": None,
359            "hop_length": None,
360            "plugin": "serial",
361            "plugin_args": {},
362            "cache_dir": None,
363        }
364        if isinstance(torchaudio, dict):
365            my_torchaudio = {**default_torchaudio, **torchaudio}
366        else:
367            my_torchaudio = default_torchaudio
368
369        torchaudio_features = extract_torchaudio_features_from_audios(audios=audios, **my_torchaudio)  # type: ignore
370    if torchaudio_squim:
371        torchaudio_squim_features = extract_objective_quality_features_from_audios(audios=audios)
372
373    results = []
374    for i in range(len(audios)):
375        result = {}
376        if opensmile:
377            result["opensmile"] = opensmile_features[i]
378        if parselmouth:
379            result["praat_parselmouth"] = parselmouth_features[i]
380        if torchaudio:
381            result["torchaudio"] = torchaudio_features[i]
382        if torchaudio_squim:
383            result["torchaudio_squim"] = torchaudio_squim_features[i]
384        results.append(result)
385
386    return results

Extract features from a list of audio objects.

Arguments:
  • audios (List[Audio]): The list of audio objects to extract features from.
  • opensmile (Union[Dict[str, str], bool]): Parameters for OpenSMILE feature extraction. If False, OpenSMILE features will not be extracted. If True, uses default OpenSMILE parameters. If a dictionary, should contain "feature_set" and "feature_level" keys.
  • parselmouth (Union[Dict[str, str], bool]): Parameters for Praat Parselmouth feature extraction. If False, Praat Parselmouth features will not be extracted. If True, uses default Praat Parselmouth parameters. If a dictionary, should contain "time_step", "window_length", "pitch_unit", "cache_dir", "speech_rate", "intensity_descriptors", "harmonicity_descriptors", "formants", "spectral_moments", "pitch", "slope_tilt", "cpp_descriptors", "duration", "jitter", "shimmer" keys.
  • torchaudio (Union[Dict[str, str], bool]): Parameters for torchaudio feature extraction. If False, torchaudio features will not be extracted. If True, uses default torchaudio parameters. If a dictionary, should contain "freq_low", "freq_high", "n_fft", "n_mels", "n_mfcc", "win_length" and "hop_length" keys.
  • torchaudio_squim (bool): Parameters for torchaudio_squim feature extraction. If False, torchaudio_squim features will not be extracted.
Returns:

List[Dict[str, Any]]: The list of feature dictionaries for each audio.

Examples:
>>> extract_features_from_audios(audios)
[{'opensmile': {'F0semitoneFrom27.5Hz_sma3nz_amean': 25.710796356201172,
'F0semitoneFrom27.5Hz_sma3nz_stddevNorm': 0.1605353206396103,
'F0semitoneFrom27.5Hz_sma3nz_percentile20.0': 21.095951080322266,
'F0semitoneFrom27.5Hz_sma3nz_percentile50.0': 25.9762020111084,
'F0semitoneFrom27.5Hz_sma3nz_percentile80.0': 29.512413024902344,
'F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2': 8.416461944580078,
'F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope': 82.34796905517578,
'F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope': 99.20043182373047,
'F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope': 22.002275466918945,
'F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope': 9.043970108032227,
'loudness_sma3_amean': 0.86087566614151,
'loudness_sma3_stddevNorm': 0.43875235319137573,
'loudness_sma3_percentile20.0': 0.5877408981323242,
'loudness_sma3_percentile50.0': 0.8352401852607727,
'loudness_sma3_percentile80.0': 1.1747918128967285,
'loudness_sma3_pctlrange0-2': 0.5870509147644043,
'loudness_sma3_meanRisingSlope': 10.285204887390137,
'loudness_sma3_stddevRisingSlope': 7.544795513153076,
'loudness_sma3_meanFallingSlope': 7.612527370452881,
'loudness_sma3_stddevFallingSlope': 4.15903902053833,
'spectralFlux_sma3_amean': 0.3213598430156708,
'spectralFlux_sma3_stddevNorm': 0.6921582818031311,
'mfcc1_sma3_amean': 10.274803161621094,
'mfcc1_sma3_stddevNorm': 1.1581648588180542,
'mfcc2_sma3_amean': 4.262020111083984,
'mfcc2_sma3_stddevNorm': 2.052302837371826,
'mfcc3_sma3_amean': 7.624598026275635,
'mfcc3_sma3_stddevNorm': 1.4570358991622925,
'mfcc4_sma3_amean': 3.6676177978515625,
'mfcc4_sma3_stddevNorm': 2.6902272701263428,
'jitterLocal_sma3nz_amean': 0.019597552716732025,
'jitterLocal_sma3nz_stddevNorm': 0.9063860177993774,
'shimmerLocaldB_sma3nz_amean': 1.264746069908142,
'shimmerLocaldB_sma3nz_stddevNorm': 0.4629262685775757,
'HNRdBACF_sma3nz_amean': 3.6400067806243896,
'HNRdBACF_sma3nz_stddevNorm': 0.5911334753036499,
'logRelF0-H1-H2_sma3nz_amean': 1.215877652168274,
'logRelF0-H1-H2_sma3nz_stddevNorm': 3.883843183517456,
'logRelF0-H1-A3_sma3nz_amean': 18.830764770507812,
'logRelF0-H1-A3_sma3nz_stddevNorm': 0.30870768427848816,
'F1frequency_sma3nz_amean': 665.1713256835938,
'F1frequency_sma3nz_stddevNorm': 0.41958823800086975,
'F1bandwidth_sma3nz_amean': 1300.2757568359375,
'F1bandwidth_sma3nz_stddevNorm': 0.16334553062915802,
'F1amplitudeLogRelF0_sma3nz_amean': -132.1533660888672,
'F1amplitudeLogRelF0_sma3nz_stddevNorm': -0.6691396832466125,
'F2frequency_sma3nz_amean': 1657.013916015625,
'F2frequency_sma3nz_stddevNorm': 0.17019854485988617,
'F2bandwidth_sma3nz_amean': 1105.7457275390625,
'F2bandwidth_sma3nz_stddevNorm': 0.24520403146743774,
'F2amplitudeLogRelF0_sma3nz_amean': -132.76707458496094,
'F2amplitudeLogRelF0_sma3nz_stddevNorm': -0.6468541026115417,
'F3frequency_sma3nz_amean': 2601.6630859375,
'F3frequency_sma3nz_stddevNorm': 0.11457356810569763,
'F3bandwidth_sma3nz_amean': 1091.15087890625,
'F3bandwidth_sma3nz_stddevNorm': 0.3787318468093872,
'F3amplitudeLogRelF0_sma3nz_amean': -134.52105712890625,
'F3amplitudeLogRelF0_sma3nz_stddevNorm': -0.620308518409729,
'alphaRatioV_sma3nz_amean': -8.626543045043945,
'alphaRatioV_sma3nz_stddevNorm': -0.4953792095184326,
'hammarbergIndexV_sma3nz_amean': 16.796842575073242,
'hammarbergIndexV_sma3nz_stddevNorm': 0.3567312955856323,
'slopeV0-500_sma3nz_amean': 0.021949246525764465,
'slopeV0-500_sma3nz_stddevNorm': 1.0097224712371826,
'slopeV500-1500_sma3nz_amean': -0.008139753714203835,
'slopeV500-1500_sma3nz_stddevNorm': -1.6243411302566528,
'spectralFluxV_sma3nz_amean': 0.4831695556640625,
'spectralFluxV_sma3nz_stddevNorm': 0.48576226830482483,
'mfcc1V_sma3nz_amean': 20.25444793701172,
'mfcc1V_sma3nz_stddevNorm': 0.44413772225379944,
'mfcc2V_sma3nz_amean': 3.619405746459961,
'mfcc2V_sma3nz_stddevNorm': 2.1765975952148438,
'mfcc3V_sma3nz_amean': 7.736487865447998,
'mfcc3V_sma3nz_stddevNorm': 1.8630998134613037,
'mfcc4V_sma3nz_amean': 4.60503625869751,
'mfcc4V_sma3nz_stddevNorm': 2.864668846130371,
'alphaRatioUV_sma3nz_amean': -2.5990121364593506,
'hammarbergIndexUV_sma3nz_amean': 8.862899780273438,
'slopeUV0-500_sma3nz_amean': 0.002166695659980178,
'slopeUV500-1500_sma3nz_amean': 0.006735736038535833,
'spectralFluxUV_sma3nz_amean': 0.24703539907932281,
'loudnessPeaksPerSec': 3.8834950923919678,
'VoicedSegmentsPerSec': 2.745098114013672,
'MeanVoicedSegmentLengthSec': 0.12214285880327225,
'StddevVoicedSegmentLengthSec': 0.09025190770626068,
'MeanUnvoicedSegmentLength': 0.20666664838790894,
'StddevUnvoicedSegmentLength': 0.17666037380695343,
'equivalentSoundLevel_dBp': -24.297256469726562},
'torchaudio': {'pitch': tensor([484.8485, 484.8485, 470.5882, 372.0930, 340.4255, 320.0000, 296.2963,
        140.3509, 135.5932, 126.9841, 124.0310, 124.0310, 113.4752, 110.3448,
        110.3448, 108.8435, 105.9603, 108.8435, 110.3448, 113.4752, 113.4752,
        124.0310, 113.4752, 113.4752, 108.8435, 105.9603, 105.9603, 105.9603,
        106.6667, 105.9603, 105.9603, 104.5752, 104.5752, 104.5752, 104.5752,
        101.2658, 101.2658, 100.6289, 100.6289, 100.0000, 100.0000,  98.1595,
            98.1595,  98.1595,  95.8084,  95.8084,  95.8084,  95.2381,  95.2381,
            94.6746,  91.9540,  91.9540,  91.9540,  91.9540,  91.9540,  91.4286,
            91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  90.9091,
            90.9091,  90.9091,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,
            91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,  91.4286,
            91.4286,  91.9540,  91.9540,  93.0233,  93.5673,  93.5673,  94.1176,
            94.6746,  94.6746,  94.6746,  95.8084,  96.3855,  96.9697, 100.0000,
        100.0000, 100.0000, 100.0000, 100.0000, 100.0000, 103.8961, 104.5752,
        104.5752, 106.6667, 106.6667, 106.6667, 111.1111, 116.7883, 116.7883,
        116.7883, 118.5185, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121,
        121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 122.1374,
        123.0769, 123.0769, 125.9843, 125.9843, 125.9843, 123.0769, 123.0769,
        122.1374, 122.1374, 121.2121, 121.2121, 121.2121, 120.3008, 117.6471,
        117.6471, 117.6471, 108.8435, 108.1081, 106.6667, 105.2632, 105.2632,
        105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632,
        108.8435, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632, 105.2632,
        105.2632, 119.4030, 119.4030, 120.3008, 120.3008, 121.2121, 121.2121,
        121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 121.2121, 122.1374,
        122.1374, 122.1374, 122.1374, 123.0769, 123.0769, 123.0769, 123.0769,
        123.0769, 123.0769, 120.3008, 120.3008, 120.3008, 119.4030, 113.4752,
        106.6667, 103.2258, 103.2258,  96.9697,  96.9697,  96.9697,  96.9697,
            96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,
            96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  96.9697,
            96.9697,  96.9697,  96.9697,  96.9697,  96.9697,  97.5610,  97.5610,
            97.5610,  97.5610,  97.5610,  98.1595, 100.0000, 100.6289, 100.6289,
        100.6289, 100.6289, 101.2658, 101.2658, 101.2658, 101.2658, 101.2658,
        101.2658, 101.2658, 101.2658, 101.2658, 101.2658, 101.2658,  97.5610,
            90.9091,  89.8876,  88.8889,  88.8889,  88.3978,  87.4317,  86.0215,
            86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
            86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
            86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,  86.0215,
            86.0215,  86.0215,  86.0215,  86.0215,  86.4865,  86.4865,  86.4865,
            86.4865,  86.4865,  87.4317,  87.9121,  87.9121,  87.9121,  89.8876,
            90.9091,  90.9091,  90.9091,  90.9091,  90.9091,  91.4286,  91.4286,
            91.4286,  92.4855,  92.4855,  93.0233,  93.0233,  93.0233,  93.5673,
            93.5673,  95.2381,  95.2381, 100.0000, 101.9108, 112.6761, 112.6761,
        112.6761, 122.1374, 122.1374, 122.1374, 130.0813, 126.9841, 126.9841,
        130.0813, 130.0813, 130.0813, 130.0813, 137.9310, 130.0813, 130.0813,
        130.0813, 126.9841, 125.9843, 126.9841, 125.9843, 125.9843, 125.9843,
        125.9843, 125.9843, 126.9841, 126.9841, 130.0813, 130.0813, 126.9841,
        130.0813, 130.0813, 132.2314, 130.0813, 130.0813, 132.2314, 134.4538,
        134.4538, 135.5932, 135.5932, 137.9310, 135.5932, 135.5932, 135.5932,
        135.5932, 137.9310, 137.9310, 140.3509, 141.5929, 141.5929, 141.5929,
        144.1441, 144.1441, 149.5327, 149.5327, 149.5327, 141.5929, 141.5929,
        141.5929, 149.5327, 149.5327, 153.8462, 160.0000, 160.0000, 160.0000,
        160.0000, 160.0000, 163.2653, 164.9485, 164.9485, 164.9485, 164.9485,
        164.9485, 164.9485, 164.9485, 164.9485, 164.9485, 164.9485, 164.9485,
        164.9485, 164.9485, 164.9485, 164.9485, 156.8627, 155.3398, 155.3398,
        155.3398, 153.8462, 153.8462, 152.3810, 152.3810, 149.5327, 148.1481,
        148.1481, 148.1481, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
        148.1481, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
        146.7890, 146.7890, 145.4545, 145.4545, 152.3810, 153.8462, 153.8462,
        153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462,
        153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462, 153.8462,
        153.8462, 153.8462, 153.8462, 153.8462, 152.3810, 152.3810, 152.3810,
        152.3810, 149.5327, 148.1481, 148.1481, 148.1481, 148.1481, 148.1481,
        148.1481, 146.7890, 148.1481, 148.1481, 145.4545, 145.4545, 145.4545,
        145.4545, 145.4545, 144.1441, 144.1441, 144.1441, 142.8571, 142.8571,
        142.8571, 142.8571, 142.8571, 142.8571, 142.8571, 144.1441, 144.1441,
        145.4545, 145.4545, 145.4545, 145.4545, 146.7890, 146.7890, 146.7890,
        146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
        146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 145.4545,
        145.4545, 145.4545, 145.4545, 145.4545, 145.4545, 145.4545, 145.4545,
        146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890, 146.7890,
        400.0000, 400.0000, 484.8485, 484.8485, 484.8485, 484.8485, 484.8485,
        484.8485, 484.8485, 484.8485, 484.8485, 484.8485]),
'mel_filter_bank': tensor([[4.4167e-04, 1.0165e-02, 1.3079e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [3.0977e-04, 1.5698e-02, 1.5785e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [8.2318e-05, 1.4367e-02, 2.8095e-01,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        ...,
        [3.6322e-05, 9.7330e-03, 5.4812e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [2.2802e-05, 1.2481e-02, 5.8374e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [5.3029e-05, 3.1305e-02, 7.9842e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00]]),
'mfcc': tensor([[-6.2570e+02, -4.7505e+02, -3.1078e+02,  ..., -6.3893e+02,
            -6.3893e+02, -6.3893e+02],
        [ 1.3593e+01,  1.9928e+01,  2.6022e+01,  ...,  3.9824e-05,
            3.9824e-05,  3.9824e-05],
        [ 7.3933e+00, -2.1680e+01, -1.4259e+01,  ..., -1.3440e-05,
            -1.3440e-05, -1.3440e-05],
        ...,
        [ 1.8122e+00, -3.1072e+00, -3.7336e+00,  ...,  7.0669e-05,
            7.0669e-05,  7.0669e-05],
        [-2.7518e-01, -9.4738e+00, -2.3157e+00,  ..., -1.7963e-04,
            -1.7963e-04, -1.7963e-04],
        [ 2.3144e-01, -6.4129e+00, -8.4420e+00,  ..., -1.5891e-04,
            -1.5891e-04, -1.5891e-04]]),
'mel_spectrogram': tensor([[4.4167e-04, 1.0165e-02, 1.3079e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [3.0977e-04, 1.5698e-02, 1.5785e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [8.2318e-05, 1.4367e-02, 2.8095e-01,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        ...,
        [3.6322e-05, 9.7330e-03, 5.4812e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [2.2802e-05, 1.2481e-02, 5.8374e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [5.3029e-05, 3.1305e-02, 7.9842e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00]]),
'spectrogram': tensor([[3.5553e-06, 5.9962e-03, 2.7176e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [5.0707e-04, 1.1670e-02, 1.5016e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [3.1901e-04, 1.8529e-02, 1.8078e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        ...,
        [1.0302e-05, 3.5917e-03, 2.7169e-03,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [9.6637e-08, 1.3364e-03, 1.8495e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00],
        [1.4414e-05, 1.0598e-04, 2.8004e-02,  ..., 0.0000e+00, 0.0000e+00,
            0.0000e+00]])},
'parselmouth': ({'duration': 5.1613125,
    'speaking_rate': 3.874983349680919,
    'articulation_rate': 3.874983349680919,
    'phonation_ratio': 1.0,
    'pause_rate': 0.0,
    'mean_pause_duration': 0.0,
    'mean_f0_hertz': 118.59917806814313,
    'std_f0_hertz': 30.232960797931817,
    'mean_intensity_db': 69.76277128148347,
    'std_intensity_db': 58.54414165935646,
    'range_ratio_intensity_db': -0.25736445047981316,
    'pitch_floor': 60.0,
    'pitch_ceiling': 250.0,
    'mean_hnr_db': 3.3285614070654375,
    'std_hnr_db': 3.36490968797237,
    'spectral_slope': -13.982306776816046,
    'spectral_tilt': -0.004414961849917737,
    'cepstral_peak_prominence_mean': 7.0388038514346825,
    'cepstral_peak_prominence_std': 1.5672438573255245,
    'mean_f1_loc': 613.4664268420964,
    'std_f1_loc': 303.98235579059883,
    'mean_b1_loc': 401.96960219300837,
    'std_b1_loc': 400.9001719378358,
    'mean_f2_loc': 1701.7755281579418,
    'std_f2_loc': 325.4405394017738,
    'mean_b2_loc': 434.542188503193,
    'std_b2_loc': 380.8914612651878,
    'spectral_gravity': 579.587511962247,
    'spectral_std_dev': 651.3025011919739,
    'spectral_skewness': 3.5879707548251045,
    'spectral_kurtosis': 19.991495997865282,
    'local_jitter': 0.02553484151620524,
    'localabsolute_jitter': 0.00021392842618599855,
    'rap_jitter': 0.012174051087556429,
    'ppq5_jitter': 0.01597797849248675,
    'ddp_jitter': 0.03652215326266929,
    'local_shimmer': 0.1530474665829716,
    'localDB_shimmer': 1.3511061323188314,
    'apq3_shimmer': 0.0702984931637734,
    'apq5_shimmer': 0.09680154282272849,
    'apq11_shimmer': 0.19065409516266155,
    'dda_shimmer': 0.2108954794913202},),
'torchaudio_squim': {'stoi': 0.9247563481330872,
'pesq': 1.3702949285507202,
'si_sdr': 11.71167278289795}}]