Merge pull request #30 from agresdominik/feat/audio_analysis

Feat/audio analysis
2026-07-22 18:50:59 +00:00 · 2025-09-25 01:10:01 +02:00
parent cf7ede5495 9ae8461070
commit 16c04fd72d
37 changed files with 746059 additions and 198 deletions
@@ -0,0 +1,11 @@
+logs/
+data/
+src/__pycache__/
+.git
+*.md
+.venv
+LICENSE
+MAKEFILE
+pytest.ini
+test/
+
@@ -1,11 +1,31 @@
+# Machine Learning grid search
+my_dir/
+
+# Audio previews
+audio_previews/
+
+# Audio data files
+audio_features*
+audio_data/
+
+# My testing file
+main_test.py
+
+# .db
+*.db
+
+# DS_Store
+.DS_Store
+
+# Gdpr Data file
+Streaming_History*
+
 # Test running file
 main_test.py

-# databases
-*.db
-
-# Custom Tokens file/rotator
-tokens.json
+# data dir
+data/*
+data-docker/

 # Visual Studio Code
 .vscode/
@@ -22,7 +22,7 @@ repos:
        files: \.(json)$

    -   id: check-added-large-files # Prevent large files from being committed
-        args: ['--maxkb=1000']
+        args: ['--maxkb=2000']
        
    -   id: check-ast # Check for parse errors in Python files
        exclude: '.*test.*'
@@ -1,24 +0,0 @@
-FROM    alpine:latest
-
-WORKDIR     /root
-
-RUN         apk update && \
-                apk add --no-cache \
-                openssh \
-                python3 \
-                py3-pip \
-                sqlite
-
-EXPOSE      22
-
-RUN         mkdir /root/src
-
-COPY        ./startup.sh /root
-COPY        ./requirements.txt /root
-COPY        ./src/ /root/src/
-
-RUN         ls -la
-
-VOLUME      /root
-
-ENTRYPOINT  ["/bin/sh", "/root/startup.sh"]
@@ -0,0 +1,19 @@
+.PHONY: all dockerfile clean
+
+TAG="unstable"
+PROJ_NAME="predictify"
+
+all: install dockerfile
+
+install:
+	mkdir -p ./data
+
+dockerfile: ./docker/Dockerfile
+	docker build \
+		--tag "$(PROJ_NAME):$(TAG)" \
+		--build-arg PROJ_NAME=$(PROJ_NAME) \
+		--file ./docker/Dockerfile \
+		.
+
+clean: ./spotify_scraped.db
+	rm -r ./data/spotify_scraped.db 
@@ -11,15 +11,45 @@ A Data analysis tool to scrape your Spotify History usage and let a ML-Model pre

 ## Usable possible APIs

-Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)
+Recently Played Tracks: `/me/player/recently-played` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)

-Get Track: /tracks/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)
+Get Track: `/tracks/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)

-Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)
+Get Track's Audio Features _(Deprecated)_: `/audio-features/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)

-Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)
+Get Track's Audio Analysis _(Deprecated)_: `/audio-analysis/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)

-Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
+Get Artist: `/artists/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
+
+## Docker usage
+
+`cd` inside the projects directory:
+```sh
+cd predictify
+```
+To run predictify inside a container, first make sure to build the image:
+```sh
+make dockerfile
+```
+Create a seperate data directory (e.g. `data-docker`):
+```sh
+mkdir data-docker
+```
+> [!NOTE]  
+> To detatch the container to run it in the background add the `--detach` directly after the `run` command.
+Then run the following docker command, to run the container in the foreground:
+```sh
+docker run \
+    --name predictify \
+    --network=host \
+    --volume $(pwd)/data-docker:/app/predictify/data \
+    --volume $(pwd)/config:/app/predictify/config \
+    predictify:unstable
+```
+
+## GDPR Data
+
+If you have gdpr data, create a folder: ```data/gdpr_data``` and add all .json files containing your play history into it. In order to extract it, run the script: ```python3 src/runtime.py --export```

 ## Authors

@@ -0,0 +1,35 @@
+FROM alpine:3.21.3
+
+# Set environment variables
+ARG PROJ_NAME
+ENV PROJ_NAME=${PROJ_NAME}
+
+RUN mkdir -p /app/${PROJ_NAME}
+
+# The following steps are executed from the specified directory below
+WORKDIR /app/${PROJ_NAME}
+
+# Install all necessary software
+RUN apk add --no-cache python3 sqlite
+
+# Create the directories, needed for persistent storage (e.g. database, tokens)
+RUN mkdir ./data ./src ./config
+
+# Create mount points for logs, data, src and config
+VOLUME /var/log ./data ./src ./config
+
+# Copy the application source code
+COPY ./src/ ./src/
+
+# Create a seperate venv inside the container & install requirements
+COPY ./requirements.txt ./requirements.txt
+RUN \
+    python -m venv .venv && \
+    source .venv/bin/activate && \
+    ./.venv/bin/pip install -r ./requirements.txt && \
+    deactivate
+
+COPY ./docker/startup.sh ./startup.sh
+
+# When starting the contianer the following is executed
+ENTRYPOINT  ["./startup.sh"]
@@ -0,0 +1,5 @@
+#!/bin/sh
+#
+# Startup predictify. Don't use this. This is for docker specifically.
+source .venv/bin/activate
+.venv/bin/python src/runtime.py --export
@@ -4,3 +4,14 @@ pre-commit==4.1.0
 pytest==8.3.5
 coverage==7.7.0
 pytest-cov==6.0.0
+pandas==2.2.3
+numpy==1.26.4
+scikit-learn==1.6.1
+tensorflow==2.19.0
+keras==3.9.2
+keras-tuner==1.4.7
+scikeras==0.13.0
+matplotlib==3.10.1
+seaborn==0.13.2
+librosa==0.11.0
+optuna==4.2.1
@@ -0,0 +1,28 @@
+import re
+from typing import Optional
+
+import requests
+
+
+def get_spotify_preview_url(spotify_track_id: str) -> Optional[str]:
+    """
+    Get the preview URL for a Spotify track using the embed page workaround.
+
+    Args:
+        spotify_track_id (str): The Spotify track ID
+
+    Returns:
+        Optional[str]: The preview URL if found, else None
+    """
+    try:
+        embed_url = f"https://open.spotify.com/embed/track/{spotify_track_id}"
+        response = requests.get(embed_url)
+        response.raise_for_status()
+
+        html = response.text
+        match = re.search(r'"audioPreview":\s*{\s*"url":\s*"([^"]+)"', html)
+        return match.group(1) if match else None
+
+    except Exception as e:
+        print(f"Failed to fetch Spotify preview URL: {e}")
+        return None
@@ -0,0 +1,391 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7814.41track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8865.11track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8410.16track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 10286.20track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6751.92track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7016.85track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 9608.71track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 569.98track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 8934.23track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 3487.43track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 8381.08track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 3057.72track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6150.47track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6555.71track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 2342.34track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 9073.67track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6341.27track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4801.47track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4224.31track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 7571.09track/s]\n",
+      "Downloading previews: 100%|██████████| 91/91 [00:00<00:00, 6534.41track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7016.58track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7011.93track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 7224.25track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 5970.09track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 1830.87track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7771.45track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 3839.22track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 8010.83track/s]\n",
+      "Downloading previews: 100%|██████████| 7/7 [00:00<00:00, 1725.85track/s]\n",
+      "Downloading previews: 100%|██████████| 80/80 [00:00<00:00, 3127.45track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 5919.12track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 2211.42track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 5711.20track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5389.72track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5007.79track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5448.83track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1677.91track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5254.51track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5087.50track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 6186.85track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 1513.61track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 6105.52track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4209.85track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1611.84track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 127.48track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 200.62track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 5717.10track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 3484.29track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 177.04track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 5664.96track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 239.08track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 223.04track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5842.92track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 7040.71track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 7355.77track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 292.89track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8041.64track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 420.54track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6490.87track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5549.89track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5031.36track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1444.37track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 5870.31track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4974.82track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4823.21track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6310.05track/s]\n",
+      "Downloading previews: 100%|██████████| 196/196 [00:00<00:00, 312.44track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5850.47track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4904.72track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5343.90track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4764.65track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 4891.16track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 280.38track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4945.14track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4609.60track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1155.63track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 3454.36track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4191.60track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4414.67track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4393.90track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 2788.99track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 6180.40track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 260.50track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4974.38track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 204.43track/s]\n",
+      "Downloading previews: 100%|██████████| 189/189 [00:00<00:00, 433.69track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4620.28track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5229.06track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 6571.83track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 252.47track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7138.69track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 4936.31track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5408.81track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6418.59track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6733.21track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6277.22track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 168.85track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 5975.06track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 7002.79track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6256.22track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6033.96track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 283.78track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6277.83track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5573.59track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 6510.58track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6384.23track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6124.12track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6541.53track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 857.85track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [00:00<00:00, 375.59track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 10254.22track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 6399.47track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6457.48track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 237.51track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6714.17track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 287.82track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6351.42track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 7704.99track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 449.76track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6541.76track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 7323.53track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 465.08track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:15<00:00,  6.16track/s] \n",
+      "Downloading previews: 100%|██████████| 97/97 [00:26<00:00,  3.60track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:25<00:00,  3.85track/s]\n",
+      "Downloading previews: 100%|██████████| 191/191 [00:57<00:00,  3.34track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:27<00:00,  3.49track/s]\n",
+      "Downloading previews: 100%|██████████| 194/194 [00:53<00:00,  3.63track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:27<00:00,  3.58track/s]\n",
+      "Downloading previews: 100%|██████████| 187/187 [00:55<00:00,  3.35track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:29<00:00,  3.19track/s]\n",
+      "Downloading previews: 100%|██████████| 196/196 [00:57<00:00,  3.41track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:25<00:00,  3.63track/s]\n",
+      "Downloading previews: 100%|██████████| 197/197 [00:52<00:00,  3.75track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [00:51<00:00,  3.71track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:26<00:00,  3.69track/s]\n",
+      "Downloading previews: 100%|██████████| 194/194 [00:55<00:00,  3.50track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:28<00:00,  3.46track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:25<00:00,  3.69track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:25<00:00,  3.65track/s]\n",
+      "Downloading previews: 100%|██████████| 193/193 [00:55<00:00,  3.46track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:26<00:00,  3.59track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [00:50<00:00,  3.74track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:25<00:00,  3.86track/s]\n",
+      "Downloading previews: 100%|██████████| 191/191 [00:52<00:00,  3.63track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:28<00:00,  3.40track/s]\n",
+      "Downloading previews: 100%|██████████| 195/195 [00:55<00:00,  3.54track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:27<00:00,  3.44track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:27<00:00,  3.57track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:34<00:00,  2.81track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:27<00:00,  3.55track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.38track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [00:52<00:00,  3.64track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:32<00:00,  3.01track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:28<00:00,  3.36track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:24<00:00,  3.92track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.40track/s]\n",
+      "Downloading previews: 100%|██████████| 188/188 [00:49<00:00,  3.79track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:26<00:00,  3.53track/s]\n",
+      "Downloading previews: 100%|██████████| 191/191 [00:55<00:00,  3.45track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.30track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:29<00:00,  3.23track/s]\n",
+      "Downloading previews: 100%|██████████| 90/90 [00:22<00:00,  3.93track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:25<00:00,  3.63track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:26<00:00,  3.60track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:26<00:00,  3.72track/s]\n",
+      "Downloading previews: 100%|██████████| 90/90 [00:24<00:00,  3.66track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:28<00:00,  3.38track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:27<00:00,  3.59track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:25<00:00,  3.74track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:25<00:00,  3.80track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:25<00:00,  3.69track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:27<00:00,  3.62track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:25<00:00,  3.71track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:26<00:00,  3.55track/s]\n",
+      "Downloading previews: 100%|██████████| 193/193 [00:50<00:00,  3.83track/s]\n",
+      "Downloading previews: 100%|██████████| 197/197 [00:53<00:00,  3.67track/s]\n",
+      "Downloading previews: 100%|██████████| 185/185 [00:46<00:00,  4.01track/s]\n",
+      "Downloading previews: 100%|██████████| 195/195 [00:48<00:00,  4.03track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [00:51<00:00,  3.68track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:26<00:00,  3.64track/s]\n",
+      "Downloading previews: 100%|██████████| 197/197 [00:52<00:00,  3.72track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:24<00:00,  3.87track/s]\n",
+      "Downloading previews: 100%|██████████| 195/195 [01:04<00:00,  3.01track/s]\n",
+      "Downloading previews: 100%|██████████| 193/193 [00:54<00:00,  3.57track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:28<00:00,  3.35track/s]\n",
+      "Downloading previews: 100%|██████████| 194/194 [00:55<00:00,  3.47track/s]\n",
+      "Downloading previews: 100%|██████████| 192/192 [00:59<00:00,  3.23track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.36track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [00:51<00:00,  3.67track/s]\n",
+      "Downloading previews: 100%|██████████| 189/189 [01:02<00:00,  3.01track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:28<00:00,  3.51track/s]\n",
+      "Downloading previews: 100%|██████████| 188/188 [00:55<00:00,  3.40track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:29<00:00,  3.19track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:38<00:00,  2.45track/s]\n",
+      "Downloading previews: 100%|██████████| 194/194 [00:55<00:00,  3.50track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:30<00:00,  3.13track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:27<00:00,  3.35track/s]\n",
+      "Downloading previews: 100%|██████████| 186/186 [00:56<00:00,  3.31track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [00:54<00:00,  3.52track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:40<00:00,  2.39track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:56<00:00,  1.64track/s]\n",
+      "Downloading previews: 100%|██████████| 193/193 [00:54<00:00,  3.57track/s]\n",
+      "Downloading previews: 100%|██████████| 195/195 [01:04<00:00,  3.03track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:32<00:00,  2.93track/s]\n",
+      "Downloading previews: 100%|██████████| 192/192 [01:05<00:00,  2.92track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:31<00:00,  3.12track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:34<00:00,  2.82track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [00:55<00:00,  3.40track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:26<00:00,  3.49track/s]\n",
+      "Downloading previews: 100%|██████████| 193/193 [00:53<00:00,  3.58track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:31<00:00,  3.03track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:28<00:00,  3.42track/s]\n",
+      "Downloading previews: 100%|██████████| 184/184 [00:50<00:00,  3.61track/s]\n",
+      "Downloading previews: 100%|██████████| 99/99 [00:31<00:00,  3.15track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:27<00:00,  3.42track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.33track/s]\n",
+      "Downloading previews: 100%|██████████| 189/189 [00:52<00:00,  3.60track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:27<00:00,  3.54track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:25<00:00,  3.72track/s]\n",
+      "Downloading previews: 100%|██████████| 91/91 [00:26<00:00,  3.47track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:27<00:00,  3.50track/s]\n",
+      "Downloading previews: 100%|██████████| 88/88 [00:23<00:00,  3.78track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:29<00:00,  3.35track/s]\n",
+      "Downloading previews: 100%|██████████| 186/186 [00:53<00:00,  3.46track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:29<00:00,  3.22track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:30<00:00,  3.13track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:32<00:00,  2.91track/s]\n",
+      "Downloading previews: 100%|██████████| 186/186 [00:56<00:00,  3.27track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.34track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:24<00:00,  3.72track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:26<00:00,  3.56track/s]\n",
+      "Downloading previews: 100%|██████████| 186/186 [00:53<00:00,  3.46track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:29<00:00,  3.18track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:27<00:00,  3.43track/s]\n",
+      "Downloading previews: 100%|██████████| 190/190 [01:01<00:00,  3.08track/s]\n",
+      "Downloading previews: 100%|██████████| 93/93 [00:28<00:00,  3.29track/s]\n",
+      "Downloading previews: 100%|██████████| 197/197 [00:59<00:00,  3.31track/s]\n",
+      "Downloading previews: 100%|██████████| 192/192 [00:59<00:00,  3.22track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:37<00:00,  2.59track/s]\n",
+      "Downloading previews: 100%|██████████| 192/192 [00:55<00:00,  3.48track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:26<00:00,  3.62track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:27<00:00,  3.48track/s]\n",
+      "Downloading previews: 100%|██████████| 188/188 [00:54<00:00,  3.44track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:28<00:00,  3.39track/s]\n",
+      "Downloading previews: 100%|██████████| 92/92 [00:28<00:00,  3.22track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:29<00:00,  3.30track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:27<00:00,  3.48track/s]\n",
+      "Downloading previews: 100%|██████████| 97/97 [00:29<00:00,  3.34track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:26<00:00,  3.66track/s]\n",
+      "Downloading previews: 100%|██████████| 193/193 [00:55<00:00,  3.49track/s]\n",
+      "Downloading previews: 100%|██████████| 193/193 [00:53<00:00,  3.62track/s]\n",
+      "Downloading previews: 100%|██████████| 94/94 [00:27<00:00,  3.41track/s]\n",
+      "Downloading previews: 100%|██████████| 188/188 [00:51<00:00,  3.62track/s]\n",
+      "Downloading previews: 100%|██████████| 191/191 [00:56<00:00,  3.41track/s]\n",
+      "Downloading previews: 100%|██████████| 193/193 [00:54<00:00,  3.52track/s]\n",
+      "Downloading previews: 100%|██████████| 191/191 [00:57<00:00,  3.30track/s]\n",
+      "Downloading previews: 100%|██████████| 196/196 [00:57<00:00,  3.43track/s]\n",
+      "Downloading previews: 100%|██████████| 95/95 [00:25<00:00,  3.67track/s]\n",
+      "Downloading previews: 100%|██████████| 98/98 [00:34<00:00,  2.82track/s]\n",
+      "Downloading previews: 100%|██████████| 188/188 [00:56<00:00,  3.35track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:28<00:00,  3.34track/s]\n",
+      "Downloading previews: 100%|██████████| 96/96 [00:29<00:00,  3.22track/s]\n",
+      "Downloading previews: 100%|██████████| 191/191 [00:58<00:00,  3.29track/s]\n",
+      "Downloading previews: 100%|██████████| 82/82 [00:25<00:00,  3.27track/s]\n",
+      "Downloading previews: 100%|██████████| 5/5 [00:00<00:00, 649.53track/s]\n",
+      "Downloading previews: 100%|██████████| 16/16 [00:00<00:00, 2081.48track/s]\n",
+      "Downloading previews: 100%|██████████| 4/4 [00:00<00:00, 1143.17track/s]\n",
+      "Downloading previews: 100%|██████████| 16/16 [00:00<00:00, 2154.59track/s]\n",
+      "Downloading previews: 100%|██████████| 49/49 [00:10<00:00,  4.51track/s]\n",
+      "Downloading previews: 100%|██████████| 36/36 [00:10<00:00,  3.49track/s]\n",
+      "Downloading previews: 100%|██████████| 19/19 [00:06<00:00,  2.76track/s]\n",
+      "Downloading previews: 100%|██████████| 24/24 [00:07<00:00,  3.36track/s]\n",
+      "Downloading previews: 100%|██████████| 20/20 [00:06<00:00,  2.99track/s]\n",
+      "Downloading previews: 100%|██████████| 33/33 [00:09<00:00,  3.33track/s]\n",
+      "Downloading previews: 100%|██████████| 30/30 [00:08<00:00,  3.72track/s]\n",
+      "Downloading previews: 100%|██████████| 10/10 [00:02<00:00,  3.87track/s]\n",
+      "Downloading previews: 100%|██████████| 2/2 [00:00<00:00, 439.26track/s]\n",
+      "Downloading previews: 100%|██████████| 1/1 [00:00<00:00,  5.52track/s]\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mKeyboardInterrupt\u001b[39m                         Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 29\u001b[39m\n\u001b[32m     26\u001b[39m df_new = df[~df[\u001b[33m'\u001b[39m\u001b[33mtrack_id\u001b[39m\u001b[33m'\u001b[39m].isin(processed)].copy()\n\u001b[32m     27\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m df_new.empty:\n\u001b[32m     28\u001b[39m     \u001b[38;5;66;03m# nothing new → wait and retry\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m29\u001b[39m     time.sleep(SLEEP_INTERVAL)\n\u001b[32m     30\u001b[39m     \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[32m     32\u001b[39m \u001b[38;5;66;03m# 3) Download each new preview with a progress bar\u001b[39;00m\n",
+      "\u001b[31mKeyboardInterrupt\u001b[39m: "
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import time\n",
+    "import requests\n",
+    "import pandas as pd\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "CSV_PATH       = './track_genre_balanced_url.csv'\n",
+    "DOWNLOAD_DIR   = 'audio_previews'\n",
+    "SLEEP_INTERVAL = 60   # seconds to wait between checks\n",
+    "\n",
+    "os.makedirs(DOWNLOAD_DIR, exist_ok=True)\n",
+    "\n",
+    "# Keep track of which track_ids we've already attempted\n",
+    "processed = set()\n",
+    "\n",
+    "while True:\n",
+    "    # 1) Load current CSV\n",
+    "    try:\n",
+    "        df = pd.read_csv(CSV_PATH)\n",
+    "    except FileNotFoundError:\n",
+    "        print(f\"{CSV_PATH} not found, waiting...\")\n",
+    "        time.sleep(SLEEP_INTERVAL)\n",
+    "        continue\n",
+    "\n",
+    "    # 2) Identify new tracks we haven't processed yet\n",
+    "    df_new = df[~df['track_id'].isin(processed)].copy()\n",
+    "    if df_new.empty:\n",
+    "        # nothing new → wait and retry\n",
+    "        time.sleep(SLEEP_INTERVAL)\n",
+    "        continue\n",
+    "\n",
+    "    # 3) Download each new preview with a progress bar\n",
+    "    for _, row in tqdm(df_new.iterrows(),\n",
+    "                       total=len(df_new),\n",
+    "                       desc=\"Downloading previews\",\n",
+    "                       unit=\"track\"):\n",
+    "        track_id   = row['track_id']\n",
+    "        preview_url = row['preview']\n",
+    "        out_path   = os.path.join(DOWNLOAD_DIR, f\"{track_id}.mp3\")\n",
+    "\n",
+    "        # mark as processed so we don't retry on crashes\n",
+    "        processed.add(track_id)\n",
+    "\n",
+    "        # skip if file already exists\n",
+    "        if os.path.exists(out_path):\n",
+    "            continue\n",
+    "\n",
+    "        # attempt download\n",
+    "        try:\n",
+    "            resp = requests.get(preview_url, timeout=30)\n",
+    "            if resp.status_code == 200:\n",
+    "                with open(out_path, 'wb') as f:\n",
+    "                    f.write(resp.content)\n",
+    "            else:\n",
+    "                print(f\"HTTP {resp.status_code} for {track_id}\")\n",
+    "        except Exception as e:\n",
+    "            print(f\"Error downloading {track_id}: {e}\")\n",
+    "\n",
+    "    # 4) Pause before next check\n",
+    "    time.sleep(SLEEP_INTERVAL)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -0,0 +1,297 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Read out Data from Kaggle Dataset, get preview URL-s and save to file\n",
+    "\n",
+    "## this should be run only once"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:   6%|▋         | 76/1183 [00:35<33:39,  1.82s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/64ffsubBonytxZc5fQJhdO\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:   9%|▊         | 102/1183 [00:55<34:18,  1.90s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2Iu5wxKFiEEQDQK1Pldsis\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:   9%|▉         | 111/1183 [01:03<33:10,  1.86s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/6syvS9gZzjB8b9DdKVhAJH\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  15%|█▌        | 180/1183 [01:54<53:30,  3.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2qrVR11O44iJ0DVTNCExjA\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  19%|█▉        | 225/1183 [02:25<29:37,  1.86s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3njPW0vttbjt5j1Elt6sJI\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  32%|███▏      | 381/1183 [03:26<23:39,  1.77s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3T7zNYia3nk9d8uXhO9Xud\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  53%|█████▎    | 630/1183 [05:23<16:28,  1.79s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/41Sfs0E8hr8w2BvzUtof4O\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  54%|█████▎    | 633/1183 [05:29<20:57,  2.29s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3H9aA6IO5gfHW72m8YU8Iv\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  57%|█████▋    | 675/1183 [05:56<15:49,  1.87s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/0lvHnw9Exl8jLV3zuRsksJ\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  67%|██████▋   | 792/1183 [07:06<12:08,  1.86s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/17sSDGIRIkB0jOKb2cBURf\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  77%|███████▋  | 911/1183 [08:03<09:15,  2.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/5RcZ5jbBgKDdM6BuoSeh8P\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  77%|███████▋  | 912/1183 [08:08<13:32,  3.00s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/0YQrHOpi219lZA8SDly4iG\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews:  90%|█████████ | 1069/1183 [09:31<03:31,  1.85s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2iql0ydkQX1hZ375EyRFFF\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching previews: 100%|██████████| 1183/1183 [10:19<00:00,  1.91it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "from spotify_preview import get_spotify_preview_url\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "# --- 0) Load & dedupe your balanced track/genre file ---\n",
+    "df = pd.read_csv('track_genres_balanced.csv')\n",
+    "df = df.drop_duplicates(subset=['track_id'])\n",
+    "df = df.dropna(subset=['genre'])\n",
+    "\n",
+    "# --- 1) Prep output CSV (header only once) ---\n",
+    "#output_csv = 'track_genre_balanced_url.csv'\n",
+    "#pd.DataFrame(columns=['track_id','genre','preview']).to_csv(output_csv, index=False)\n",
+    "#output_csv = pd.read_csv('track_genre_balanced_url.csv')\n",
+    "\n",
+    "output_csv = 'track_genre_balanced_url.csv'\n",
+    "\n",
+    "if os.path.exists(output_csv):\n",
+    "    # load already-fetched track_ids and drop them from df\n",
+    "    done = pd.read_csv(output_csv, usecols=['track_id'])\n",
+    "    processed_ids = set(done['track_id'].astype(str))\n",
+    "    df = df[~df['track_id'].astype(str).isin(processed_ids)]\n",
+    "    write_header = False\n",
+    "else:\n",
+    "    # new file → write header\n",
+    "    pd.DataFrame(columns=['track_id','genre','preview']) \\\n",
+    "      .to_csv(output_csv, index=False)\n",
+    "    write_header = False  # header is already there\n",
+    "\n",
+    "# --- 2) Parameters ---\n",
+    "BATCH_SIZE = 100   # how many tracks to process per “mini‐batch”\n",
+    "PAUSE      = 0.1   # if you want a small sleep between API calls\n",
+    "\n",
+    "# --- 3) Loop with a single progress bar over all tracks ---\n",
+    "with tqdm(total=len(df), desc=\"Fetching previews\") as pbar:\n",
+    "    for start in range(0, len(df), BATCH_SIZE):\n",
+    "        chunk = df.iloc[start:start + BATCH_SIZE]\n",
+    "        rows = []\n",
+    "\n",
+    "        # 4) Per‐track lookup\n",
+    "        for _, row in chunk.iterrows():\n",
+    "            track_id = row['track_id']\n",
+    "            genre    = row['genre']\n",
+    "\n",
+    "            preview = get_spotify_preview_url(track_id)\n",
+    "            if preview:\n",
+    "                rows.append({\n",
+    "                    'track_id': track_id,\n",
+    "                    'genre':     genre,\n",
+    "                    'preview':   preview\n",
+    "                })\n",
+    "            # else: silently skip or print an error if you prefer\n",
+    "\n",
+    "            pbar.update(1)\n",
+    "            if PAUSE:\n",
+    "                import time; time.sleep(PAUSE)\n",
+    "\n",
+    "        # 5) Append this batch’s hits to disk\n",
+    "        if rows:\n",
+    "            pd.DataFrame(rows).to_csv(\n",
+    "                output_csv,\n",
+    "                mode='a',\n",
+    "                header=False,\n",
+    "                index=False\n",
+    "            )\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -1,6 +1,5 @@
 import base64
 import json
-import logging as log
 import os
 import time
 from http.server import BaseHTTPRequestHandler, HTTPServer
@@ -9,7 +8,11 @@ from urllib.parse import parse_qs, urlencode, urlparse
 import dotenv
 import requests

-TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json')
+from logger import LoggerWrapper
+
+TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'tokens.json')
+
+log = LoggerWrapper()


 def simple_authenticate(grant_type: str = "client_credentials") -> str:
@@ -32,13 +35,17 @@ def simple_authenticate(grant_type: str = "client_credentials") -> str:
        "grant_type": f"{grant_type}"
    }

+    try:
        response = requests.post(token_url, headers=headers, data=data)
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error authenticating: {e}")
+        return None

    if response.status_code == 200:
        access_token = response.json().get('access_token')
        return access_token
    else:
-        log.error(f"Error {response.status_code}: {response.text}")
+        log.error(f"Error authenticating {response.status_code}: {response.text}")


 def authenticate(scope: str) -> str:
@@ -101,10 +108,14 @@ def _read_env_file() -> tuple:

    :return: tuple
    """
+    try:
        current_dir = os.path.dirname(os.path.abspath(__file__))
-    dotenv_folder_path = os.path.join(current_dir, 'env')
+        dotenv_folder_path = os.path.join(current_dir, '../config')
        dotenv_path = os.path.join(dotenv_folder_path, '.env')
        contents = dotenv.dotenv_values(dotenv_path=dotenv_path)
+    except Exception as e:
+        log.error(f"Error reading the .env file: {e}")
+        return None
    spotify_client_id = contents['SPOTIFY_CLIENT_ID']
    spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET']
    spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI']
@@ -158,7 +169,12 @@ def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, clien
        'client_secret': client_secret,
    }

+    try:
        response = requests.post(token_url, data=data, headers=headers)
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error exchanging code for token: {e}")
+        return None
+
    response_data = response.json()

    if 'access_token' not in response_data:
@@ -192,7 +208,12 @@ def _refresh_access_token(refresh_token: str, client_id: str, client_secret: str
        'client_secret': client_secret,
    }

+    try:
        response = requests.post(token_url, data=data, headers=headers)
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error refreshing access token: {e}")
+        return None
+
    response_data = response.json()

    if 'access_token' not in response_data:
@@ -1,7 +1,12 @@
-import logging as log
 import sqlite3
 from enum import Enum

+from logger import LoggerWrapper
+
+# DATABASE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'spotify_scraped.db')
+
+log = LoggerWrapper()
+

 class Table(Enum):
    TRACK_INFORMATION = "track_information"
@@ -16,7 +21,7 @@ class Database:
    A class to handle the database connection and operations
    """

-    def __init__(self, db_name):
+    def __init__(self, db_name: str):
        """Initialize the connection to the database"""
        self.db_name = db_name
        self.conn = sqlite3.connect(db_name)
@@ -60,8 +65,18 @@ class Database:
        self.cursor.execute(f'''
        CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} (
            track_id TEXT PRIMARY KEY,
-            attribute_name TEXT,
-            attribute_value TEXT
+            acousticness FLOAT,
+            danceability FLOAT,
+            duration_ms INTEGER,
+            energy FLOAT,
+            instrumentalness FLOAT,
+            key INTEGER,
+            liveness FLOAT,
+            loudness FLOAT,
+            speechiness FLOAT,
+            tempo FLOAT,
+            time_signature INTEGER,
+            valence FLOAT
        );
        ''')

@@ -73,12 +88,14 @@ class Database:
            album_id TEXT,
            FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id),
            FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id),
-            FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id)
+            FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id),
+            FOREIGN KEY (track_id) REFERENCES {Table.TRACK_ATTRIBUTES.value}(track_id)
        );
        ''')

        # Commit the changes
        self.conn.commit()
+        log.debug("Initialised tables")

    def add_row(self, table: Table, values):
        """Add a new row into the specified table"""
@@ -88,17 +105,22 @@ class Database:
            self.cursor.execute(query, values)
            self.conn.commit()
        except Exception as e:
-            log.debug(f"Error: {e}")
+            log.error(f"Error while inserting row into table {table.value}: {e}")

    def read_all_rows(self, table: Table, column: str = "*"):
        """Read all rows from the specified table"""
+        try:
            self.cursor.execute(f"SELECT {column} FROM {table.value}")
            rows = self.cursor.fetchall()
            return rows
+        except Exception as e:
+            log.error(f"Error while reading all rows from table {table.value}: {e}")
+            return []

-    def close(self):
+    def close(self, message: str):
        """Close the database connection"""
        self.conn.close()
+        log.info(f"Database connection closed from file: {message}")

    def get_total_overview(self) -> list:
        """Retrieve a total overview of all recently played songs with full details"""
@@ -122,5 +144,6 @@ class Database:
            rows = self.cursor.fetchall()
            return rows
        except Exception as e:
-            log.error(f"Error retrieving total overview: {e}")
+            log.error(f"Error retrieving total overview: {e}"
+                      f"\nQuery Executed: {query}")
            return []
@@ -0,0 +1,149 @@
+import json
+import os
+
+from auth import simple_authenticate
+from database_handler import Database, Table
+from logger import LoggerWrapper
+from spotify_api import get_multiple_field_information
+
+# Define the absolute folder path to the folder containing the gdrp retrieved data
+folder_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'gdpr_data')
+
+log = LoggerWrapper()
+
+
+def _read_gdrp_data() -> list:
+    """
+    This function reads all .json files in the folder containing the gdpr data.
+    This data is then extracted into a dict and sorted by timestamp ascending.
+
+    :return: all_songs_played: A dict with an items field containing all songs played for the user
+    """
+    all_songs_played = []
+    try:
+        for filename in os.listdir(folder_path):
+
+            if filename.endswith('.json'):
+                file_path = os.path.join(folder_path, filename)
+
+                with open(file_path, 'r') as file:
+                    data = json.load(file)
+
+                    for entry in data:
+                        # This removes all podcasts from the list
+                        if entry['spotify_track_uri'] is None:
+                            continue
+                        try:
+                            track = {
+                                'timestamp': entry['ts'],
+                                'id': _extract_id(entry['spotify_track_uri']),
+                                'track_name': entry['master_metadata_track_name'],
+                                'artist_name': entry['master_metadata_album_artist_name'],
+                                'album_name': entry['master_metadata_album_album_name'],
+                                'conn_country': entry['conn_country'],
+                                'ms_played': entry['ms_played']
+                                }
+                            all_songs_played.append(track)
+                        except Exception as e:
+                            log.warning(f'Missing field from gdpr data: {e}')
+    except Exception as e:
+        log.error(f'Failed to read gdpr data: {e}')
+
+    all_songs_played = sorted(all_songs_played, key=lambda x: x['timestamp'])
+    return all_songs_played
+
+
+def _extract_id(spotify_id: str) -> str:
+    """
+    This function gets a id with extra details and extracts the id from it.
+
+    :param: id a string containing the id
+    :return: str the ID
+    """
+    prefix = "spotify:track:"
+    prefix_removed_id = spotify_id[len(prefix):]
+    return prefix_removed_id
+
+
+def _populate_ids(all_songs_played: list):
+
+    track_ids = []
+    all_songs_played_info = []
+    token = simple_authenticate()
+
+    processed_songs_id = set()
+
+    counter = 0
+
+    for entry in all_songs_played:
+        track_id = entry['id']
+
+        if track_id not in processed_songs_id:
+            track_ids.append(track_id)
+            processed_songs_id.add(track_id)
+            counter += 1
+
+        if (counter + 1) % 50 == 0 and len(track_ids) > 0:
+            track_ids_tuple = tuple(track_ids)
+            track_ids.clear()
+            response = get_multiple_field_information(token, 'tracks', 50, *track_ids_tuple)
+            all_songs_played_info.extend(_sort_and_create_required_dataset(response))
+            counter = 0
+
+    if len(track_ids) > 0:
+        track_ids_tuple = tuple(track_ids)
+        response = get_multiple_field_information(token, 'tracks', 50, *track_ids_tuple)
+        all_songs_played_info.extend(_sort_and_create_required_dataset(response))
+
+    return all_songs_played_info
+
+
+def _sort_and_create_required_dataset(response) -> dict:
+
+    track_list = []
+
+    for entry in response['tracks']:
+        track_data = {
+            'track_id': entry['id'],
+            'album_id': entry['album']['id'],
+            'artist_id': entry['artists'][0]['id']
+        }
+        track_list.append(track_data)
+
+    return track_list
+
+
+def _fill_missing_ids(all_songs_played, all_songs_catalogued):
+
+    # Create a dictionary to map track_id to artist_id and album_id
+    track_id_to_artist_album = {data['track_id']: {'album_id': data['album_id'], 'artist_id': data['artist_id']} for data in all_songs_catalogued}
+
+    # Now, we will update the original `tracks` list by adding artist_id and album_id
+    for track in all_songs_played:
+        track_info = track_id_to_artist_album.get(track['id'])
+        if track_info:
+            track['artist_id'] = track_info['artist_id']
+            track['album_id'] = track_info['album_id']
+
+    return all_songs_played
+
+
+def _insert_data_into_db(db: Database, all_songs_played: list):
+    """
+    This function takes a list of all played songs and inserts these into the database.
+
+    :param: all_songs_played list of all songs
+    """
+    for entry in all_songs_played:
+        try:
+            db.add_row(Table.RECENTLY_PLAYED, (entry['timestamp'], entry['id'], entry['artist_id'], entry['album_id']))
+        except Exception as e:
+            log.error(f'Failed adding {entry} to database, error {e}')
+
+
+def export_gdpr_data(db: Database, n_limit: int = 100) -> None:
+    all_songs_played = _read_gdrp_data()
+    all_songs_played = all_songs_played[-n_limit:]
+    all_songs_catalogued = _populate_ids(all_songs_played)
+    all_songs_played = _fill_missing_ids(all_songs_played, all_songs_catalogued)
+    _insert_data_into_db(db, all_songs_played)
@@ -0,0 +1,61 @@
+import logging
+import os
+from logging.handlers import RotatingFileHandler
+from pathlib import Path
+
+
+class LoggerWrapper():
+
+    def __init__(self, logger_name: str = "standard_logger"):
+        self.logger = logging.getLogger(logger_name)
+        if not self.logger.handlers:
+            self.logger.setLevel(logging.DEBUG)
+            self.setup_logger()
+
+    def set_console_handler_to_debug(self):
+        for handler in self.logger.handlers:
+            if isinstance(handler, logging.StreamHandler):
+                handler.setLevel(logging.DEBUG)
+
+    def setup_logger(self):
+        # Define and create folder
+        logs_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'logs')
+        Path(logs_folder).mkdir(parents=True, exist_ok=True)
+
+        # Define file path
+        log_file = log_file = os.path.join(logs_folder, 'predictify.log')
+
+        # Setup File Handler
+        handler = RotatingFileHandler(log_file, maxBytes=1000000, backupCount=5)
+        handler.setLevel(logging.DEBUG)
+
+        # Setup Console Handler
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.WARNING)
+
+        # Setup Formatter
+        formatter = logging.Formatter('%(asctime)s - [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s')
+
+        # Add Formatters to Handlers
+        handler.setFormatter(formatter)
+        console_handler.setFormatter(formatter)
+
+        # Add Handlers to Logger
+        self.logger.addHandler(handler)
+        self.logger.addHandler(console_handler)
+
+    def info(self, message):
+        self.logger.info(message)
+
+    def debug(self, message):
+        self.logger.debug(message)
+
+    def warning(self, message):
+        self.logger.warning(message)
+
+    def error(self, message):
+        self.logger.error(message)
+
+    def critical(self, message):
+        self.logger.critical(message)
+        # Here we can add alerting/handling
@@ -1,8 +1,75 @@
+import argparse
+import atexit
+import os
+import sys
+import traceback
 from time import sleep

-from scraper import scraping
+from database_handler import Database
+from gdpr_export import export_gdpr_data
+from logger import LoggerWrapper
+from scraper import scrape_missing_infos, scraping
+
+log = LoggerWrapper()
+
+
+def _handle_exit():
+    """
+    Function to log exit information if the script ends unexpectedly.
+    """
+    log.critical("Script terminated unexpectedly.")
+
+
+def _log_crash_info(exc_type, exc_value, exc_tb):
+    """Custom function to log crash info when an exception occurs."""
+    log.critical("A critical error occurred!", exc_info=(exc_type, exc_value, exc_tb))
+    log.critical("Exception type: %s", exc_type)
+    log.critical("Exception message: %s", exc_value)
+    log.critical("Stack trace:\n%s", ''.join(traceback.format_tb(exc_tb)))
+
+
+# Register the exit handler and excepthook
+atexit.register(_handle_exit)
+sys.excepthook = _log_crash_info
+
+
+# Initialize the parser
+parser = argparse.ArgumentParser(description="A python script written in Python3.13 which continuously checks what spotify songs "
+                                             "the user is listening to and logging these in a local database. \n"
+                                             "The Script also has a export function where it can read out the gdpr data exported by the user.")
+
+# Add optional arguments
+parser.add_argument('--verbose', '-v', action='store_true', help="Enable verbose output")
+parser.add_argument('--export', type=str, choices=['TEST', 'PRODUCTION'], required=True,
+                    help="Export the gdpr data from spotify if not done already. Choose between TEST and PRODUCTION."
+                    "TEST will export only a small number of songs, PRODUCTION will export all songs.")
+
+# Parse the arguments
+args = parser.parse_args()
+
+if args.verbose:
+    log.set_console_handler_to_debug()
+    log.info('Enabled verbose mode')
+
+db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', f'spotify_scrape_{args.export}.db')
+
+if args.export == 'TEST':
+    export_size = 10000
+    log.info(f'Scraping GDPR Data. Sample size: {export_size}')
+    db = Database(db_path)
+    export_gdpr_data(db, export_size)
+    scrape_missing_infos(db)
+elif args.export == 'PRODUCTION':
+    export_size = 1000000
+    log.info('Scraping all GDPR Data.')
+    db = Database(db_path)
+    export_gdpr_data(db, export_size)
+    scrape_missing_infos(db)
+else:
+    raise ValueError('Invalid export type. Please choose between TEST and PRODUCTION.')

-# Run forever on intervals of 30 minutes
 while True:
-    scraping()
+    log.info('Scraping API...')
+    scraping(db)
+    log.info('Done scraping API. Sleeping for 30 minutes...')
    sleep(1800)
@@ -1,150 +1,131 @@
-import requests
-
 from auth import authenticate, simple_authenticate
 from database_handler import Database, Table
+from logger import LoggerWrapper
+from spotify_api import get_last_played_track, get_multiple_field_information

-db = Database('spotify_scraped.db')
+log = LoggerWrapper()


-def scraping():
+def scraping(db: Database) -> None:
    """
    This function is the main function that will be executed when the script is run
    """
-    global db

    scope = "user-read-recently-played"
    bearer_token = authenticate(scope)

-    # Once each 30 mins
-    _read_recently_played_page_and_add_to_db(bearer_token=bearer_token)
-    _scrape_missing_infos()
-
-    db.close()
+    _read_recently_played_page_and_add_to_db(db, bearer_token)
+    scrape_missing_infos(db)


-def _read_recently_played_page_and_add_to_db(bearer_token: str):
+def _read_recently_played_page_and_add_to_db(db: Database, bearer_token: str) -> None:
    """
+    This function gets a list of song play history and adds it into the database.
    """
-    global db

-    last_played_track = _get_last_played_track(bearer_token=bearer_token)
+    last_played_track = get_last_played_track(bearer_token=bearer_token)

-    for track in last_played_track['items']:
+    try:
+        for track in reversed(last_played_track['items']):
            track_id = track['track']['id']
            played_at = track['played_at']
            album_id = track['track']['album']['id']
            artist_id = track['track']['artists'][0]['id']
            db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
+    except Exception as e:
+        log.error(f"Failed to add returned play history to database: {e}"
+                  f"\nReturned Value: {last_played_track}")


-def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict:
-    """
-    This function returns the last played track based on the limit size
-
-    :param limit: str
-    :param bearer_token: str
-    :return: dict
+def scrape_missing_infos(db: Database) -> None:
    """

-    header = {
-        'Authorization': f'Bearer {bearer_token}'
-    }
-
-    response = requests.get(url, headers=header)
-    response_json = response.json()
-    return response_json
-
-
-def _get_track_information(track_id: str, bearer_token: str) -> dict:
    """
-    This function returns the track information based on the track id
-
-    :param track_id: str
-    :param bearer_token: str
-    :return: dict
-    """
-
-    url = f"https://api.spotify.com/v1/tracks/{track_id}"
-    header = {
-        'Authorization': f'Bearer {bearer_token}'
-    }
-
-    response = requests.get(url, headers=header)
-    response_json = response.json()
-    return response_json
-
-
-def _get_artist_information(artist_id: str, bearer_token: str) -> dict:
-    """
-    This function returns the artist information based on the artist id
-
-    :param artist_id: str
-    :param bearer_token: str
-    :return: dict
-    """
-
-    url = f"https://api.spotify.com/v1/artists/{artist_id}"
-    header = {
-        'Authorization': f'Bearer {bearer_token}'
-    }
-
-    response = requests.get(url, headers=header)
-    response_json = response.json()
-    return response_json
-
-
-def _get_album_information(album_id: str, bearer_token: str) -> dict:
-    """
-    This function returns the album information based on the album id
-
-    :param album_id: str
-    :param bearer_token: str
-    :return: dict
-    """
-
-    url = f"https://api.spotify.com/v1/albums/{album_id}"
-    header = {
-        'Authorization': f'Bearer {bearer_token}'
-    }
-
-    response = requests.get(url, headers=header)
-    response_json = response.json()
-    return response_json
-
-
-def _scrape_missing_infos():
-    """
-    """
-    global db
-
    bearer_token_simple = simple_authenticate()

-    # Track Info
-    all_track_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id')
-    all_track_ids_saved = db.read_all_rows(Table.TRACK_INFORMATION, 'track_id')
-    all_track_ids_missing = list(set(all_track_ids_recently_played) - set(all_track_ids_saved))
-    for track_id in all_track_ids_missing:
-        response = _get_track_information(track_id=track_id[0], bearer_token=bearer_token_simple)
-        db.add_row(Table.TRACK_INFORMATION, (response['id'], response['name'], response['duration_ms'], response['explicit'], response['popularity']))
-    # Album Info
-    all_album_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'album_id')
-    all_album_ids_saved = db.read_all_rows(Table.ALBUM_INFORMATION, 'album_id')
-    all_album_ids_missing = list(set(all_album_ids_recently_played) - set(all_album_ids_saved))
-    for album_id in all_album_ids_missing:
-        response = _get_album_information(album_id=album_id[0], bearer_token=bearer_token_simple)
+    _process_missing_info(db, bearer_token_simple, Table.TRACK_INFORMATION, 'track_id', 'tracks')
+    _process_missing_info(db, bearer_token_simple, Table.ALBUM_INFORMATION, 'album_id', 'albums')
+    _process_missing_info(db, bearer_token_simple, Table.ARTIST_INFORMATION, 'artist_id', 'artists')
+    # _process_missing_info(db, bearer_token_simple, Table.TRACK_ATTRIBUTES, 'track_id', 'audio-features')
+
+
+def _process_missing_info(db: Database, bearer_token_simple: str, table_name: Table, id_field_name: str, endpoint_name: str) -> None:
+
+    if endpoint_name == 'albums':
+        limit = 20
+    elif endpoint_name == 'audio-features':
+        limit = 100
+    else:
+        limit = 50
+
+    all_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, id_field_name)
+    all_ids_saved = db.read_all_rows(table_name, id_field_name)
+    all_ids_missing = list(set(all_ids_recently_played) - set(all_ids_saved))
+
+    log.debug(f"Number of missing {table_name.name} entries: {len(all_ids_missing)}. Inserting...")
+
+    ids = []
+    processed_ids = set()
+
+    counter = 0
+
+    for id_value in all_ids_missing:
+
+        id_value_str = id_value[0]
+
+        if id_value_str not in processed_ids:
+            ids.append(id_value_str)
+            processed_ids.add(id_value_str)
+            counter += 1
+
+        if (counter + 1) % limit == 0 and len(ids) > 0:
+            ids_tuple = tuple(ids)
+            ids.clear()
+            response = get_multiple_field_information(bearer_token_simple, endpoint_name, limit, *ids_tuple)
+            _add_data_to_database(db, table_name, response)
+            counter = 0
+
+    if len(ids) > 0:
+        ids_tuple = tuple(ids)
+        ids.clear()
+        response = get_multiple_field_information(bearer_token_simple, endpoint_name, limit, *ids_tuple)
+        _add_data_to_database(db, table_name, response)
+
+
+def _add_data_to_database(db: Database, table_name: Table, response) -> None:
+
+    if table_name == Table.TRACK_INFORMATION:
+        log.debug('Adding track information to database')
+        for entry in response['tracks']:
+            log.debug(f"Adding track: {entry['name']}")
+            db.add_row(table_name, (entry['id'], entry['name'], entry['duration_ms'], entry['explicit'], entry['popularity']))
+
+    elif table_name == Table.ALBUM_INFORMATION:
+        log.debug('Adding album information to database')
+        for entry in response['albums']:
+            log.debug(f"Adding album: {entry['name']}")
            try:
-            release_year = response['release_date'][:4]
+                release_year = entry['release_date'][:4]
            except Exception:
                release_year = ""
-        db.add_row(Table.ALBUM_INFORMATION, (response['id'], response['name'], response['album_type'], response['total_tracks'], release_year, response['label']))
-    # Artist Info
-    all_artist_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'artist_id')
-    all_artist_ids_saved = db.read_all_rows(Table.ARTIST_INFORMATION, 'artist_id')
-    all_artist_ids_missing = list(set(all_artist_ids_recently_played) - set(all_artist_ids_saved))
-    for artist_id in all_artist_ids_missing:
-        response = _get_artist_information(artist_id=artist_id[0], bearer_token=bearer_token_simple)
+            db.add_row(table_name, (entry['id'], entry['name'], entry['album_type'], entry['total_tracks'], release_year, entry['label']))
+
+    elif table_name == Table.ARTIST_INFORMATION:
+        log.debug('Adding artist information to database')
+        for entry in response['artists']:
+            log.debug(f"Adding artist: {entry['name']}")
            try:
-            genre = response['genres'][0]
+                genre = entry['genres'][0]
            except IndexError:
                genre = ""
-        db.add_row(Table.ARTIST_INFORMATION, (response['id'], response['name'], response['followers']['total'], genre, response['popularity']))
+            db.add_row(Table.ARTIST_INFORMATION, (entry['id'], entry['name'], entry['followers']['total'], genre, entry['popularity']))
+
+    elif table_name == Table.TRACK_ATTRIBUTES:
+        log.debug('Adding track attributes to database')
+        for entry in response['audio_features']:
+            log.debug(f"Adding track attributes: {entry['id']}")
+            try:
+                db.add_row(Table.TRACK_ATTRIBUTES, (entry['id'], entry['aucousticness'], entry['danceability'], entry['duration_ms'], entry['energy'], entry['instrumentalness'], entry['key'], entry['liveness'], entry['loudness'], entry['speechiness'], entry['tempo'], entry['time_signature'], entry['valence']))
+            except Exception as e:
+                log.error(f"Failed to add track attributes to database: {e}"
+                          f"\nReturned Value: {response}")
@@ -0,0 +1,140 @@
+from typing import Union
+
+import requests
+
+from logger import LoggerWrapper
+
+log = LoggerWrapper()
+
+
+def get_last_played_track(bearer_token: str, url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50") -> Union[dict, None]:
+    """
+    This function returns the last played track based on the limit size
+
+    :param limit: str
+    :param bearer_token: str
+    :return: dict
+    """
+
+    header = {
+        'Authorization': f'Bearer {bearer_token}'
+    }
+
+    try:
+        log.debug(f"GET Request: {url}")
+        response = requests.get(url, headers=header)
+        response_json = response.json()
+        return response_json
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error in get_last_played_track: {e}")
+        return None
+
+
+def get_track_information(track_id: str, bearer_token: str) -> Union[dict, None]:
+    """
+    This function returns the track information based on the track id
+
+    :param track_id: str
+    :param bearer_token: str
+    :return: dict
+    """
+
+    url = f"https://api.spotify.com/v1/tracks/{track_id}"
+    header = {
+        'Authorization': f'Bearer {bearer_token}'
+    }
+
+    try:
+        log.debug(f"GET Request: {url}")
+        response = requests.get(url, headers=header)
+        response_json = response.json()
+        return response_json
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error in get_track_information: {e}")
+        return None
+
+
+def get_artist_information(artist_id: str, bearer_token: str) -> Union[dict, None]:
+    """
+    This function returns the artist information based on the artist id
+
+    :param artist_id: str
+    :param bearer_token: str
+    :return: dict
+    """
+
+    url = f"https://api.spotify.com/v1/artists/{artist_id}"
+    header = {
+        'Authorization': f'Bearer {bearer_token}'
+    }
+    try:
+        log.debug(f"GET Request: {url}")
+        response = requests.get(url, headers=header)
+        response_json = response.json()
+        return response_json
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error in get_artist_information: {e}")
+        return None
+
+
+def get_album_information(album_id: str, bearer_token: str) -> Union[dict, None]:
+    """
+    This function returns the album information based on the album id
+
+    :param album_id: str
+    :param bearer_token: str
+    :return: dict
+    """
+
+    url = f"https://api.spotify.com/v1/albums/{album_id}"
+    header = {
+        'Authorization': f'Bearer {bearer_token}'
+    }
+
+    try:
+        log.debug(f"GET Request: {url}")
+        response = requests.get(url, headers=header)
+        response_json = response.json()
+        return response_json
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error in get_album_information: {e}")
+        return None
+
+
+def get_multiple_field_information(bearer_token: str, api_type: str, limit: int,  *track_ids) -> Union[dict, None]:
+    """
+    This function returns the track information based on the track id
+
+    :param *track_id: str
+    :param bearer_token: str
+    :return: dict
+    """
+
+    if len(track_ids) > limit:
+        log.error(f'exceeding the limit if ids {limit} for endpoint {api_type}')
+        return None
+
+    url_suffix = "ids="
+    separator = ","
+    try:
+        for track_id in track_ids:
+            url_suffix = url_suffix + track_id + separator
+    except Exception as e:
+        log.error(f"Failed setting up the url for multiple ids request."
+                  f"Error: {e}")
+        return None
+
+    url = f"https://api.spotify.com/v1/{api_type}?{url_suffix}"
+    url = url[:-len(separator)]
+    header = {
+        'Authorization': f'Bearer {bearer_token}'
+    }
+
+    try:
+        log.debug(f"GET Request: {url}")
+        response = requests.get(url, headers=header)
+        response_json = response.json()
+        return response_json
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error in get_multiple_field_information: {e}")
+        return None
@@ -1,14 +0,0 @@
-#!/bin/sh
-#
-# Starup the predictify scraper
-
-if test -f ./requirements.txt 
-then
-    python3 -m venv .venv
-    .venv/bin/pip install -r ./requirements.txt
-else 
-    printf "Missing requirements file! aborting...\n"
-    exit 1
-fi
-
-.venv/bin/python3 src/runtime.py