Analysis files

This commit is contained in:
agres
2025-04-03 15:14:53 +02:00
parent 4ae2a5ace6
commit 4706332180
5 changed files with 2233 additions and 45 deletions
+3
View File
@@ -1,3 +1,6 @@
# Audio previews
audio_previews/
# My testing file # My testing file
main_test.py main_test.py
+337
View File
@@ -0,0 +1,337 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing file 1/1772...\n",
"Processing file 2/1772...\n",
"Processing file 3/1772...\n",
"Processing file 4/1772...\n",
"Processing file 5/1772...\n",
"Processing file 6/1772...\n",
"Processing file 7/1772...\n",
"Processing file 8/1772...\n",
"Processing file 9/1772...\n",
"Processing file 10/1772...\n",
"Processing file 11/1772...\n",
"Processing file 12/1772...\n",
"Processing file 13/1772...\n",
"Processing file 14/1772...\n",
"Processing file 15/1772...\n",
"Processing file 16/1772...\n",
"Processing file 17/1772...\n",
"Processing file 18/1772...\n",
"Processing file 19/1772...\n",
"Processing file 20/1772...\n",
"Processing file 21/1772...\n",
"Processing file 22/1772...\n",
"Processing file 23/1772...\n",
"Processing file 24/1772...\n",
"Processing file 25/1772...\n",
"Processing file 26/1772...\n",
"Processing file 27/1772...\n",
"Processing file 28/1772...\n",
"Processing file 29/1772...\n",
"Processing file 30/1772...\n",
"Processing file 31/1772...\n",
"Processing file 32/1772...\n",
"Processing file 33/1772...\n",
"Processing file 34/1772...\n",
"Processing file 35/1772...\n",
"Processing file 36/1772...\n",
"Processing file 37/1772...\n",
"Processing file 38/1772...\n",
"Processing file 39/1772...\n",
"Processing file 40/1772...\n",
"Processing file 41/1772...\n",
"Processing file 42/1772...\n",
"Processing file 43/1772...\n",
"Processing file 44/1772...\n",
"Processing file 45/1772...\n",
"Processing file 46/1772...\n",
"Processing file 47/1772...\n",
"Processing file 48/1772...\n",
"Processing file 49/1772...\n",
"Processing file 50/1772...\n",
"Processing file 51/1772...\n",
"Processing file 52/1772...\n",
"Processing file 53/1772...\n",
"Processing file 54/1772...\n",
"Processing file 55/1772...\n",
"Processing file 56/1772...\n",
"Processing file 57/1772...\n",
"Processing file 58/1772...\n",
"Processing file 59/1772...\n",
"Processing file 60/1772...\n",
"Processing file 61/1772...\n",
"Processing file 62/1772...\n",
"Processing file 63/1772...\n",
"Processing file 64/1772...\n",
"Processing file 65/1772...\n",
"Processing file 66/1772...\n",
"Processing file 67/1772...\n",
"Processing file 68/1772...\n",
"Processing file 69/1772...\n",
"Processing file 70/1772...\n",
"Processing file 71/1772...\n",
"Processing file 72/1772...\n",
"Processing file 73/1772...\n",
"Processing file 74/1772...\n",
"Processing file 75/1772...\n",
"Processing file 76/1772...\n",
"Processing file 77/1772...\n",
"Processing file 78/1772...\n",
"Processing file 79/1772...\n",
"Processing file 80/1772...\n",
"Processing file 81/1772...\n",
"Processing file 82/1772...\n",
"Processing file 83/1772...\n",
"Processing file 84/1772...\n",
"Processing file 85/1772...\n",
"Processing file 86/1772...\n",
"Processing file 87/1772...\n",
"Processing file 88/1772...\n",
"Processing file 89/1772...\n",
"Processing file 90/1772...\n",
"Processing file 91/1772...\n",
"Processing file 92/1772...\n",
"Processing file 93/1772...\n",
"Processing file 94/1772...\n",
"Processing file 95/1772...\n",
"Processing file 96/1772...\n",
"Processing file 97/1772...\n",
"Processing file 98/1772...\n",
"Processing file 99/1772...\n",
"Processing file 100/1772...\n",
"Processing file 101/1772...\n",
"Processing file 102/1772...\n",
"Processing file 103/1772...\n",
"Processing file 104/1772...\n",
"Processing file 105/1772...\n",
"Processing file 106/1772...\n",
"Processing file 107/1772...\n",
"Processing file 108/1772...\n",
"Processing file 109/1772...\n",
"Processing file 110/1772...\n",
"Processing file 111/1772...\n",
"Processing file 112/1772...\n",
"Processing file 113/1772...\n",
"Processing file 114/1772...\n",
"Processing file 115/1772...\n",
"Processing file 116/1772...\n",
"Processing file 117/1772...\n",
"Processing file 118/1772...\n",
"Processing file 119/1772...\n",
"Processing file 120/1772...\n",
"Processing file 121/1772...\n",
"Processing file 122/1772...\n",
"Processing file 123/1772...\n",
"Processing file 124/1772...\n",
"Processing file 125/1772...\n",
"Processing file 126/1772...\n",
"Processing file 127/1772...\n",
"Processing file 128/1772...\n",
"Processing file 129/1772...\n",
"Processing file 130/1772...\n",
"Processing file 131/1772...\n",
"Processing file 132/1772...\n",
"Processing file 133/1772...\n",
"Processing file 134/1772...\n",
"Processing file 135/1772...\n",
"Processing file 136/1772...\n",
"Processing file 137/1772...\n",
"Processing file 138/1772...\n",
"Processing file 139/1772...\n",
"Processing file 140/1772...\n",
"Processing file 141/1772...\n",
"Processing file 142/1772...\n",
"Processing file 143/1772...\n",
"Processing file 144/1772...\n",
"Processing file 145/1772...\n",
"Processing file 146/1772...\n",
"Processing file 147/1772...\n",
"Processing file 148/1772...\n",
"Processing file 149/1772...\n",
"Processing file 150/1772...\n",
"Processing file 151/1772...\n",
"Processing file 152/1772...\n",
"Processing file 153/1772...\n",
"Processing file 154/1772...\n",
"Processing file 155/1772...\n",
"Processing file 156/1772...\n",
"Processing file 157/1772...\n",
"Processing file 158/1772...\n",
"Processing file 159/1772...\n",
"Processing file 160/1772...\n",
"Processing file 161/1772...\n",
"Processing file 162/1772...\n",
"Processing file 163/1772...\n",
"Processing file 164/1772...\n",
"Processing file 165/1772...\n",
"Processing file 166/1772...\n",
"Processing file 167/1772...\n",
"Processing file 168/1772...\n",
"Processing file 169/1772...\n",
"Processing file 170/1772...\n",
"Processing file 171/1772...\n",
"Processing file 172/1772...\n",
"Processing file 173/1772...\n",
"Processing file 174/1772...\n",
"Processing file 175/1772...\n",
"Processing file 176/1772...\n",
"Processing file 177/1772...\n",
"Processing file 178/1772...\n",
"Processing file 179/1772...\n",
"Processing file 180/1772...\n",
"Processing file 181/1772...\n",
"Processing file 182/1772...\n",
"Processing file 183/1772...\n",
"Processing file 184/1772...\n",
"Processing file 185/1772...\n",
"Processing file 186/1772...\n",
"Processing file 187/1772...\n",
"Processing file 188/1772...\n",
"Processing file 189/1772...\n",
"Processing file 190/1772...\n",
"Processing file 191/1772...\n",
"Processing file 192/1772...\n",
"Processing file 193/1772...\n",
"Processing file 194/1772...\n",
"Processing file 195/1772...\n",
"Processing file 196/1772...\n",
"Processing file 197/1772...\n",
"Processing file 198/1772...\n",
"Processing file 199/1772...\n",
"Processing file 200/1772...\n",
"Processing file 201/1772...\n",
"Processing file 202/1772...\n",
"Processing file 203/1772...\n",
"Processing file 204/1772...\n",
"Processing file 205/1772...\n",
"Processing file 206/1772...\n",
"Processing file 207/1772...\n",
"Processing file 208/1772...\n",
"Processing file 209/1772...\n",
"Processing file 210/1772...\n",
"Processing file 211/1772...\n",
"Processing file 212/1772...\n",
"Processing file 213/1772...\n",
"Processing file 214/1772...\n",
"Processing file 215/1772...\n",
"Processing file 216/1772...\n",
"Processing file 217/1772...\n",
"Processing file 218/1772...\n",
"Processing file 219/1772...\n",
"Processing file 220/1772...\n",
"Processing file 221/1772...\n",
"Processing file 222/1772...\n",
"Processing file 223/1772...\n",
"Processing file 224/1772...\n",
"Processing file 225/1772...\n",
"Processing file 226/1772...\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 53\u001b[39m\n\u001b[32m 51\u001b[39m file_path = os.path.join(folder_path, file)\n\u001b[32m 52\u001b[39m file_id = os.path.splitext(file)[\u001b[32m0\u001b[39m]\n\u001b[32m---> \u001b[39m\u001b[32m53\u001b[39m features = \u001b[43mextract_features_librosa\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 54\u001b[39m audio_features_essentia[file_id] = features\n\u001b[32m 56\u001b[39m \u001b[38;5;28mprint\u001b[39m(audio_features_essentia)\n",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 12\u001b[39m, in \u001b[36mextract_features_librosa\u001b[39m\u001b[34m(file_path)\u001b[39m\n\u001b[32m 10\u001b[39m mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=\u001b[32m13\u001b[39m)\n\u001b[32m 11\u001b[39m chroma = librosa.feature.chroma_stft(y=y, sr=sr)\n\u001b[32m---> \u001b[39m\u001b[32m12\u001b[39m spectral_centroid = \u001b[43mlibrosa\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfeature\u001b[49m\u001b[43m.\u001b[49m\u001b[43mspectral_centroid\u001b[49m\u001b[43m(\u001b[49m\u001b[43my\u001b[49m\u001b[43m=\u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msr\u001b[49m\u001b[43m=\u001b[49m\u001b[43msr\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 13\u001b[39m spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)\n\u001b[32m 14\u001b[39m zero_crossings = librosa.feature.zero_crossing_rate(y)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/Projects/seb/predictify/.venv/lib/python3.11/site-packages/librosa/feature/spectral.py:186\u001b[39m, in \u001b[36mspectral_centroid\u001b[39m\u001b[34m(y, sr, S, n_fft, hop_length, freq, win_length, window, center, pad_mode)\u001b[39m\n\u001b[32m 183\u001b[39m freq = util.expand_to(freq, ndim=S.ndim, axes=-\u001b[32m2\u001b[39m)\n\u001b[32m 185\u001b[39m \u001b[38;5;66;03m# Column-normalize S\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m186\u001b[39m centroid: np.ndarray = \u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43msum\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 187\u001b[39m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m \u001b[49m\u001b[43mutil\u001b[49m\u001b[43m.\u001b[49m\u001b[43mnormalize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mS\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnorm\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m=\u001b[49m\u001b[43m-\u001b[49m\u001b[32;43m2\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m=\u001b[49m\u001b[43m-\u001b[49m\u001b[32;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[32m 188\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m centroid\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/Projects/seb/predictify/.venv/lib/python3.11/site-packages/numpy/core/fromnumeric.py:2313\u001b[39m, in \u001b[36msum\u001b[39m\u001b[34m(a, axis, dtype, out, keepdims, initial, where)\u001b[39m\n\u001b[32m 2310\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m out\n\u001b[32m 2311\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m res\n\u001b[32m-> \u001b[39m\u001b[32m2313\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_wrapreduction\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43madd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43msum\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkeepdims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2314\u001b[39m \u001b[43m \u001b[49m\u001b[43minitial\u001b[49m\u001b[43m=\u001b[49m\u001b[43minitial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m=\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/Projects/seb/predictify/.venv/lib/python3.11/site-packages/numpy/core/fromnumeric.py:88\u001b[39m, in \u001b[36m_wrapreduction\u001b[39m\u001b[34m(obj, ufunc, method, axis, dtype, out, **kwargs)\u001b[39m\n\u001b[32m 85\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 86\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m reduction(axis=axis, out=out, **passkwargs)\n\u001b[32m---> \u001b[39m\u001b[32m88\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mufunc\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreduce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mpasskwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[31mKeyboardInterrupt\u001b[39m: "
]
}
],
"source": [
"import os\n",
"import librosa\n",
"import numpy as np\n",
"\n",
"def extract_features_librosa(file_path):\n",
" # Load the audio file\n",
" y, sr = librosa.load(file_path, sr=None)\n",
" \n",
" # Extract features\n",
" mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)\n",
" chroma = librosa.feature.chroma_stft(y=y, sr=sr)\n",
" spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)\n",
" spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)\n",
" zero_crossings = librosa.feature.zero_crossing_rate(y)\n",
" rms = librosa.feature.rms(y=y)\n",
" \n",
" # Estimate tempo and beats\n",
" tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)\n",
" \n",
" # A simple heuristic for \"danceability\"\n",
" # For example, we can assume higher tempo and consistent beats might be more danceable\n",
" # (This is only an example and not a scientifically rigorous metric.)\n",
" danceability = tempo / 200 # Normalize tempo (assume 200 BPM as an upper bound)\n",
" \n",
" # Aggregate features into a dictionary\n",
" features = {\n",
" 'mfcc_mean': np.mean(mfccs, axis=1).tolist(), # Mean of each MFCC coefficient\n",
" 'mfcc_std': np.std(mfccs, axis=1).tolist(),\n",
" 'chroma_mean': np.mean(chroma, axis=1).tolist(),\n",
" 'spectral_centroid_mean': np.mean(spectral_centroid).item(),\n",
" 'spectral_rolloff_mean': np.mean(spectral_rolloff).item(),\n",
" 'zero_crossing_rate_mean': np.mean(zero_crossings).item(),\n",
" 'rms_mean': np.mean(rms).item(),\n",
" 'tempo': tempo,\n",
" 'danceability': danceability,\n",
" 'beat_count': len(beat_frames)\n",
" }\n",
" return features\n",
"\n",
"folder_path = './audio_previews'\n",
"audio_features_essentia = {}\n",
"\n",
"folder_len = os.listdir(folder_path)\n",
"folder_len_num = len(folder_len)\n",
"counter = 0\n",
"\n",
"for file in os.listdir(folder_path):\n",
" if file.endswith('.mp3'):\n",
" counter += 1\n",
" print(f'Processing file {counter}/{folder_len_num}...')\n",
" file_path = os.path.join(folder_path, file)\n",
" file_id = os.path.splitext(file)[0]\n",
" features = extract_features_librosa(file_path)\n",
" audio_features_essentia[file_id] = features\n",
"\n",
"print(audio_features_essentia)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
+1860 -44
View File
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -54,7 +54,7 @@ if args.verbose:
db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', f'spotify_scrape_{args.export}.db') db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', f'spotify_scrape_{args.export}.db')
if args.export == 'TEST': if args.export == 'TEST':
export_size = 200 export_size = 10000
log.info(f'Scraping GDPR Data. Sample size: {export_size}') log.info(f'Scraping GDPR Data. Sample size: {export_size}')
db = Database(db_path) db = Database(db_path)
export_gdpr_data(db, export_size) export_gdpr_data(db, export_size)
+32
View File
@@ -0,0 +1,32 @@
import re
from typing import Optional
import requests
def get_spotify_preview_url(spotify_track_id: str) -> Optional[str]:
"""
Get the preview URL for a Spotify track using the embed page workaround.
Args:
spotify_track_id (str): The Spotify track ID
Returns:
Optional[str]: The preview URL if found, else None
"""
try:
embed_url = f"https://open.spotify.com/embed/track/{spotify_track_id}"
response = requests.get(embed_url)
response.raise_for_status()
html = response.text
match = re.search(r'"audioPreview":\s*{\s*"url":\s*"([^"]+)"', html)
return match.group(1) if match else None
except Exception as e:
print(f"Failed to fetch Spotify preview URL: {e}")
return None
# example usage:
preview_url = get_spotify_preview_url('1301WleyT98MSxVHPZCA6M')