Merge pull request #30 from agresdominik/feat/audio_analysis

Feat/audio analysis
2026-07-22 18:50:59 +00:00 · 2025-09-25 01:10:01 +02:00
parent cf7ede5495 9ae8461070
commit 16c04fd72d
37 changed files with 746059 additions and 198 deletions
@@ -0,0 +1,11 @@
 logs/
 data/
 src/__pycache__/
 .git
 *.md
 .venv
 LICENSE
 MAKEFILE
 pytest.ini
 test/
@@ -1,11 +1,31 @@
 # Machine Learning grid search
 my_dir/
 # Audio previews
 audio_previews/
 # Audio data files
 audio_features*
 audio_data/
 # My testing file
 main_test.py
 # .db
 *.db
 # DS_Store
 .DS_Store
 # Gdpr Data file
 Streaming_History*
 # Test running file
 main_test.py
-# databases
+# data dir
-*.db
+data/*
-
+data-docker/
 # Custom Tokens file/rotator
 tokens.json
 # Visual Studio Code
 .vscode/
@@ -22,7 +22,7 @@ repos:
        files: \.(json)$
    -   id: check-added-large-files # Prevent large files from being committed
-        args: ['--maxkb=1000']
+        args: ['--maxkb=2000']
    -   id: check-ast # Check for parse errors in Python files
        exclude: '.*test.*'
@@ -1,24 +0,0 @@
 FROM    alpine:latest
 WORKDIR     /root
 RUN         apk update && \
                apk add --no-cache \
                openssh \
                python3 \
                py3-pip \
                sqlite
 EXPOSE      22
 RUN         mkdir /root/src
 COPY        ./startup.sh /root
 COPY        ./requirements.txt /root
 COPY        ./src/ /root/src/
 RUN         ls -la
 VOLUME      /root
 ENTRYPOINT  ["/bin/sh", "/root/startup.sh"]
@@ -0,0 +1,19 @@
 .PHONY: all dockerfile clean
 TAG="unstable"
 PROJ_NAME="predictify"
 all: install dockerfile
 install:
 	mkdir -p ./data
 dockerfile: ./docker/Dockerfile
 	docker build \
 		--tag "$(PROJ_NAME):$(TAG)" \
 		--build-arg PROJ_NAME=$(PROJ_NAME) \
 		--file ./docker/Dockerfile \
 		.
 clean: ./spotify_scraped.db
 	rm -r ./data/spotify_scraped.db 
@@ -11,15 +11,45 @@ A Data analysis tool to scrape your Spotify History usage and let a ML-Model pre
 ## Usable possible APIs
-Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)
+Recently Played Tracks: `/me/player/recently-played` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)
-Get Track: /tracks/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)
+Get Track: `/tracks/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)
-Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)
+Get Track's Audio Features _(Deprecated)_: `/audio-features/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)
-Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)
+Get Track's Audio Analysis _(Deprecated)_: `/audio-analysis/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)
-Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
+Get Artist: `/artists/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
 ## Docker usage
 `cd` inside the projects directory:
 ```sh
 cd predictify
 ```
 To run predictify inside a container, first make sure to build the image:
 ```sh
 make dockerfile
 ```
 Create a seperate data directory (e.g. `data-docker`):
 ```sh
 mkdir data-docker
 ```
 > [!NOTE]  
 > To detatch the container to run it in the background add the `--detach` directly after the `run` command.
 Then run the following docker command, to run the container in the foreground:
 ```sh
 docker run \
    --name predictify \
    --network=host \
    --volume $(pwd)/data-docker:/app/predictify/data \
    --volume $(pwd)/config:/app/predictify/config \
    predictify:unstable
 ```
 ## GDPR Data
 If you have gdpr data, create a folder: ```data/gdpr_data``` and add all .json files containing your play history into it. In order to extract it, run the script: ```python3 src/runtime.py --export```
 ## Authors
@@ -0,0 +1,35 @@
 FROM alpine:3.21.3
 # Set environment variables
 ARG PROJ_NAME
 ENV PROJ_NAME=${PROJ_NAME}
 RUN mkdir -p /app/${PROJ_NAME}
 # The following steps are executed from the specified directory below
 WORKDIR /app/${PROJ_NAME}
 # Install all necessary software
 RUN apk add --no-cache python3 sqlite
 # Create the directories, needed for persistent storage (e.g. database, tokens)
 RUN mkdir ./data ./src ./config
 # Create mount points for logs, data, src and config
 VOLUME /var/log ./data ./src ./config
 # Copy the application source code
 COPY ./src/ ./src/
 # Create a seperate venv inside the container & install requirements
 COPY ./requirements.txt ./requirements.txt
 RUN \
    python -m venv .venv && \
    source .venv/bin/activate && \
    ./.venv/bin/pip install -r ./requirements.txt && \
    deactivate
 COPY ./docker/startup.sh ./startup.sh
 # When starting the contianer the following is executed
 ENTRYPOINT  ["./startup.sh"]
@@ -0,0 +1,5 @@
 #!/bin/sh
 #
 # Startup predictify. Don't use this. This is for docker specifically.
 source .venv/bin/activate
 .venv/bin/python src/runtime.py --export
@@ -4,3 +4,14 @@ pre-commit==4.1.0
 pytest==8.3.5
 coverage==7.7.0
 pytest-cov==6.0.0
 pandas==2.2.3
 numpy==1.26.4
 scikit-learn==1.6.1
 tensorflow==2.19.0
 keras==3.9.2
 keras-tuner==1.4.7
 scikeras==0.13.0
 matplotlib==3.10.1
 seaborn==0.13.2
 librosa==0.11.0
 optuna==4.2.1
@@ -0,0 +1,28 @@
 import re
 from typing import Optional
 import requests
 def get_spotify_preview_url(spotify_track_id: str) -> Optional[str]:
    """
    Get the preview URL for a Spotify track using the embed page workaround.
    Args:
        spotify_track_id (str): The Spotify track ID
    Returns:
        Optional[str]: The preview URL if found, else None
    """
    try:
        embed_url = f"https://open.spotify.com/embed/track/{spotify_track_id}"
        response = requests.get(embed_url)
        response.raise_for_status()
        html = response.text
        match = re.search(r'"audioPreview":\s*{\s*"url":\s*"([^"]+)"', html)
        return match.group(1) if match else None
    except Exception as e:
        print(f"Failed to fetch Spotify preview URL: {e}")
        return None
@@ -0,0 +1,391 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7814.41track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8865.11track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8410.16track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 10286.20track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6751.92track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7016.85track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 9608.71track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 569.98track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 8934.23track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 3487.43track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 8381.08track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 3057.72track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6150.47track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6555.71track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 2342.34track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 9073.67track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6341.27track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4801.47track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4224.31track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 7571.09track/s]\n",
      "Downloading previews: 100%|██████████| 91/91 [00:00<00:00, 6534.41track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7016.58track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7011.93track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 7224.25track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 5970.09track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 1830.87track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7771.45track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 3839.22track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 8010.83track/s]\n",
      "Downloading previews: 100%|██████████| 7/7 [00:00<00:00, 1725.85track/s]\n",
      "Downloading previews: 100%|██████████| 80/80 [00:00<00:00, 3127.45track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 5919.12track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 2211.42track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 5711.20track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5389.72track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5007.79track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5448.83track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1677.91track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5254.51track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5087.50track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 6186.85track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 1513.61track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 6105.52track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4209.85track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1611.84track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 127.48track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 200.62track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 5717.10track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 3484.29track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 177.04track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 5664.96track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 239.08track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 223.04track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5842.92track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 7040.71track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 7355.77track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 292.89track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8041.64track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 420.54track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6490.87track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5549.89track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5031.36track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1444.37track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 5870.31track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4974.82track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4823.21track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6310.05track/s]\n",
      "Downloading previews: 100%|██████████| 196/196 [00:00<00:00, 312.44track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5850.47track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4904.72track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5343.90track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4764.65track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 4891.16track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 280.38track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4945.14track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4609.60track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1155.63track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 3454.36track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4191.60track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4414.67track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4393.90track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 2788.99track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 6180.40track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 260.50track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4974.38track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 204.43track/s]\n",
      "Downloading previews: 100%|██████████| 189/189 [00:00<00:00, 433.69track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4620.28track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5229.06track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 6571.83track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 252.47track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7138.69track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 4936.31track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5408.81track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6418.59track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6733.21track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6277.22track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 168.85track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 5975.06track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 7002.79track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6256.22track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6033.96track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 283.78track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6277.83track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5573.59track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 6510.58track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6384.23track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6124.12track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6541.53track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 857.85track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [00:00<00:00, 375.59track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 10254.22track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 6399.47track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6457.48track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 237.51track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6714.17track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 287.82track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6351.42track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 7704.99track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 449.76track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6541.76track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 7323.53track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 465.08track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:15<00:00,  6.16track/s] \n",
      "Downloading previews: 100%|██████████| 97/97 [00:26<00:00,  3.60track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:25<00:00,  3.85track/s]\n",
      "Downloading previews: 100%|██████████| 191/191 [00:57<00:00,  3.34track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:27<00:00,  3.49track/s]\n",
      "Downloading previews: 100%|██████████| 194/194 [00:53<00:00,  3.63track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:27<00:00,  3.58track/s]\n",
      "Downloading previews: 100%|██████████| 187/187 [00:55<00:00,  3.35track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:29<00:00,  3.19track/s]\n",
      "Downloading previews: 100%|██████████| 196/196 [00:57<00:00,  3.41track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:25<00:00,  3.63track/s]\n",
      "Downloading previews: 100%|██████████| 197/197 [00:52<00:00,  3.75track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [00:51<00:00,  3.71track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:26<00:00,  3.69track/s]\n",
      "Downloading previews: 100%|██████████| 194/194 [00:55<00:00,  3.50track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:28<00:00,  3.46track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:25<00:00,  3.69track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:25<00:00,  3.65track/s]\n",
      "Downloading previews: 100%|██████████| 193/193 [00:55<00:00,  3.46track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:26<00:00,  3.59track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [00:50<00:00,  3.74track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:25<00:00,  3.86track/s]\n",
      "Downloading previews: 100%|██████████| 191/191 [00:52<00:00,  3.63track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:28<00:00,  3.40track/s]\n",
      "Downloading previews: 100%|██████████| 195/195 [00:55<00:00,  3.54track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:27<00:00,  3.44track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:27<00:00,  3.57track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:34<00:00,  2.81track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:27<00:00,  3.55track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.38track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [00:52<00:00,  3.64track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:32<00:00,  3.01track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:28<00:00,  3.36track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:24<00:00,  3.92track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.40track/s]\n",
      "Downloading previews: 100%|██████████| 188/188 [00:49<00:00,  3.79track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:26<00:00,  3.53track/s]\n",
      "Downloading previews: 100%|██████████| 191/191 [00:55<00:00,  3.45track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.30track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:29<00:00,  3.23track/s]\n",
      "Downloading previews: 100%|██████████| 90/90 [00:22<00:00,  3.93track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:25<00:00,  3.63track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:26<00:00,  3.60track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:26<00:00,  3.72track/s]\n",
      "Downloading previews: 100%|██████████| 90/90 [00:24<00:00,  3.66track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:28<00:00,  3.38track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:27<00:00,  3.59track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:25<00:00,  3.74track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:25<00:00,  3.80track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:25<00:00,  3.69track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:27<00:00,  3.62track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:25<00:00,  3.71track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:26<00:00,  3.55track/s]\n",
      "Downloading previews: 100%|██████████| 193/193 [00:50<00:00,  3.83track/s]\n",
      "Downloading previews: 100%|██████████| 197/197 [00:53<00:00,  3.67track/s]\n",
      "Downloading previews: 100%|██████████| 185/185 [00:46<00:00,  4.01track/s]\n",
      "Downloading previews: 100%|██████████| 195/195 [00:48<00:00,  4.03track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [00:51<00:00,  3.68track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:26<00:00,  3.64track/s]\n",
      "Downloading previews: 100%|██████████| 197/197 [00:52<00:00,  3.72track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:24<00:00,  3.87track/s]\n",
      "Downloading previews: 100%|██████████| 195/195 [01:04<00:00,  3.01track/s]\n",
      "Downloading previews: 100%|██████████| 193/193 [00:54<00:00,  3.57track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:28<00:00,  3.35track/s]\n",
      "Downloading previews: 100%|██████████| 194/194 [00:55<00:00,  3.47track/s]\n",
      "Downloading previews: 100%|██████████| 192/192 [00:59<00:00,  3.23track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.36track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [00:51<00:00,  3.67track/s]\n",
      "Downloading previews: 100%|██████████| 189/189 [01:02<00:00,  3.01track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:28<00:00,  3.51track/s]\n",
      "Downloading previews: 100%|██████████| 188/188 [00:55<00:00,  3.40track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:29<00:00,  3.19track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:38<00:00,  2.45track/s]\n",
      "Downloading previews: 100%|██████████| 194/194 [00:55<00:00,  3.50track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:30<00:00,  3.13track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:27<00:00,  3.35track/s]\n",
      "Downloading previews: 100%|██████████| 186/186 [00:56<00:00,  3.31track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [00:54<00:00,  3.52track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:40<00:00,  2.39track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:56<00:00,  1.64track/s]\n",
      "Downloading previews: 100%|██████████| 193/193 [00:54<00:00,  3.57track/s]\n",
      "Downloading previews: 100%|██████████| 195/195 [01:04<00:00,  3.03track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:32<00:00,  2.93track/s]\n",
      "Downloading previews: 100%|██████████| 192/192 [01:05<00:00,  2.92track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:31<00:00,  3.12track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:34<00:00,  2.82track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [00:55<00:00,  3.40track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:26<00:00,  3.49track/s]\n",
      "Downloading previews: 100%|██████████| 193/193 [00:53<00:00,  3.58track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:31<00:00,  3.03track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:28<00:00,  3.42track/s]\n",
      "Downloading previews: 100%|██████████| 184/184 [00:50<00:00,  3.61track/s]\n",
      "Downloading previews: 100%|██████████| 99/99 [00:31<00:00,  3.15track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:27<00:00,  3.42track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.33track/s]\n",
      "Downloading previews: 100%|██████████| 189/189 [00:52<00:00,  3.60track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:27<00:00,  3.54track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:25<00:00,  3.72track/s]\n",
      "Downloading previews: 100%|██████████| 91/91 [00:26<00:00,  3.47track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:27<00:00,  3.50track/s]\n",
      "Downloading previews: 100%|██████████| 88/88 [00:23<00:00,  3.78track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:29<00:00,  3.35track/s]\n",
      "Downloading previews: 100%|██████████| 186/186 [00:53<00:00,  3.46track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:29<00:00,  3.22track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:30<00:00,  3.13track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:32<00:00,  2.91track/s]\n",
      "Downloading previews: 100%|██████████| 186/186 [00:56<00:00,  3.27track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:27<00:00,  3.34track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:24<00:00,  3.72track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:26<00:00,  3.56track/s]\n",
      "Downloading previews: 100%|██████████| 186/186 [00:53<00:00,  3.46track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:29<00:00,  3.18track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:27<00:00,  3.43track/s]\n",
      "Downloading previews: 100%|██████████| 190/190 [01:01<00:00,  3.08track/s]\n",
      "Downloading previews: 100%|██████████| 93/93 [00:28<00:00,  3.29track/s]\n",
      "Downloading previews: 100%|██████████| 197/197 [00:59<00:00,  3.31track/s]\n",
      "Downloading previews: 100%|██████████| 192/192 [00:59<00:00,  3.22track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:37<00:00,  2.59track/s]\n",
      "Downloading previews: 100%|██████████| 192/192 [00:55<00:00,  3.48track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:26<00:00,  3.62track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:27<00:00,  3.48track/s]\n",
      "Downloading previews: 100%|██████████| 188/188 [00:54<00:00,  3.44track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:28<00:00,  3.39track/s]\n",
      "Downloading previews: 100%|██████████| 92/92 [00:28<00:00,  3.22track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:29<00:00,  3.30track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:27<00:00,  3.48track/s]\n",
      "Downloading previews: 100%|██████████| 97/97 [00:29<00:00,  3.34track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:26<00:00,  3.66track/s]\n",
      "Downloading previews: 100%|██████████| 193/193 [00:55<00:00,  3.49track/s]\n",
      "Downloading previews: 100%|██████████| 193/193 [00:53<00:00,  3.62track/s]\n",
      "Downloading previews: 100%|██████████| 94/94 [00:27<00:00,  3.41track/s]\n",
      "Downloading previews: 100%|██████████| 188/188 [00:51<00:00,  3.62track/s]\n",
      "Downloading previews: 100%|██████████| 191/191 [00:56<00:00,  3.41track/s]\n",
      "Downloading previews: 100%|██████████| 193/193 [00:54<00:00,  3.52track/s]\n",
      "Downloading previews: 100%|██████████| 191/191 [00:57<00:00,  3.30track/s]\n",
      "Downloading previews: 100%|██████████| 196/196 [00:57<00:00,  3.43track/s]\n",
      "Downloading previews: 100%|██████████| 95/95 [00:25<00:00,  3.67track/s]\n",
      "Downloading previews: 100%|██████████| 98/98 [00:34<00:00,  2.82track/s]\n",
      "Downloading previews: 100%|██████████| 188/188 [00:56<00:00,  3.35track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:28<00:00,  3.34track/s]\n",
      "Downloading previews: 100%|██████████| 96/96 [00:29<00:00,  3.22track/s]\n",
      "Downloading previews: 100%|██████████| 191/191 [00:58<00:00,  3.29track/s]\n",
      "Downloading previews: 100%|██████████| 82/82 [00:25<00:00,  3.27track/s]\n",
      "Downloading previews: 100%|██████████| 5/5 [00:00<00:00, 649.53track/s]\n",
      "Downloading previews: 100%|██████████| 16/16 [00:00<00:00, 2081.48track/s]\n",
      "Downloading previews: 100%|██████████| 4/4 [00:00<00:00, 1143.17track/s]\n",
      "Downloading previews: 100%|██████████| 16/16 [00:00<00:00, 2154.59track/s]\n",
      "Downloading previews: 100%|██████████| 49/49 [00:10<00:00,  4.51track/s]\n",
      "Downloading previews: 100%|██████████| 36/36 [00:10<00:00,  3.49track/s]\n",
      "Downloading previews: 100%|██████████| 19/19 [00:06<00:00,  2.76track/s]\n",
      "Downloading previews: 100%|██████████| 24/24 [00:07<00:00,  3.36track/s]\n",
      "Downloading previews: 100%|██████████| 20/20 [00:06<00:00,  2.99track/s]\n",
      "Downloading previews: 100%|██████████| 33/33 [00:09<00:00,  3.33track/s]\n",
      "Downloading previews: 100%|██████████| 30/30 [00:08<00:00,  3.72track/s]\n",
      "Downloading previews: 100%|██████████| 10/10 [00:02<00:00,  3.87track/s]\n",
      "Downloading previews: 100%|██████████| 2/2 [00:00<00:00, 439.26track/s]\n",
      "Downloading previews: 100%|██████████| 1/1 [00:00<00:00,  5.52track/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mKeyboardInterrupt\u001b[39m                         Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 29\u001b[39m\n\u001b[32m     26\u001b[39m df_new = df[~df[\u001b[33m'\u001b[39m\u001b[33mtrack_id\u001b[39m\u001b[33m'\u001b[39m].isin(processed)].copy()\n\u001b[32m     27\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m df_new.empty:\n\u001b[32m     28\u001b[39m     \u001b[38;5;66;03m# nothing new → wait and retry\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m29\u001b[39m     time.sleep(SLEEP_INTERVAL)\n\u001b[32m     30\u001b[39m     \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[32m     32\u001b[39m \u001b[38;5;66;03m# 3) Download each new preview with a progress bar\u001b[39;00m\n",
      "\u001b[31mKeyboardInterrupt\u001b[39m: "
     ]
    }
   ],
   "source": [
    "import os\n",
    "import time\n",
    "import requests\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "CSV_PATH       = './track_genre_balanced_url.csv'\n",
    "DOWNLOAD_DIR   = 'audio_previews'\n",
    "SLEEP_INTERVAL = 60   # seconds to wait between checks\n",
    "\n",
    "os.makedirs(DOWNLOAD_DIR, exist_ok=True)\n",
    "\n",
    "# Keep track of which track_ids we've already attempted\n",
    "processed = set()\n",
    "\n",
    "while True:\n",
    "    # 1) Load current CSV\n",
    "    try:\n",
    "        df = pd.read_csv(CSV_PATH)\n",
    "    except FileNotFoundError:\n",
    "        print(f\"{CSV_PATH} not found, waiting...\")\n",
    "        time.sleep(SLEEP_INTERVAL)\n",
    "        continue\n",
    "\n",
    "    # 2) Identify new tracks we haven't processed yet\n",
    "    df_new = df[~df['track_id'].isin(processed)].copy()\n",
    "    if df_new.empty:\n",
    "        # nothing new → wait and retry\n",
    "        time.sleep(SLEEP_INTERVAL)\n",
    "        continue\n",
    "\n",
    "    # 3) Download each new preview with a progress bar\n",
    "    for _, row in tqdm(df_new.iterrows(),\n",
    "                       total=len(df_new),\n",
    "                       desc=\"Downloading previews\",\n",
    "                       unit=\"track\"):\n",
    "        track_id   = row['track_id']\n",
    "        preview_url = row['preview']\n",
    "        out_path   = os.path.join(DOWNLOAD_DIR, f\"{track_id}.mp3\")\n",
    "\n",
    "        # mark as processed so we don't retry on crashes\n",
    "        processed.add(track_id)\n",
    "\n",
    "        # skip if file already exists\n",
    "        if os.path.exists(out_path):\n",
    "            continue\n",
    "\n",
    "        # attempt download\n",
    "        try:\n",
    "            resp = requests.get(preview_url, timeout=30)\n",
    "            if resp.status_code == 200:\n",
    "                with open(out_path, 'wb') as f:\n",
    "                    f.write(resp.content)\n",
    "            else:\n",
    "                print(f\"HTTP {resp.status_code} for {track_id}\")\n",
    "        except Exception as e:\n",
    "            print(f\"Error downloading {track_id}: {e}\")\n",
    "\n",
    "    # 4) Pause before next check\n",
    "    time.sleep(SLEEP_INTERVAL)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
@@ -0,0 +1,297 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read out Data from Kaggle Dataset, get preview URL-s and save to file\n",
    "\n",
    "## this should be run only once"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:   6%|▋         | 76/1183 [00:35<33:39,  1.82s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/64ffsubBonytxZc5fQJhdO\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:   9%|▊         | 102/1183 [00:55<34:18,  1.90s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2Iu5wxKFiEEQDQK1Pldsis\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:   9%|▉         | 111/1183 [01:03<33:10,  1.86s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/6syvS9gZzjB8b9DdKVhAJH\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  15%|█▌        | 180/1183 [01:54<53:30,  3.20s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2qrVR11O44iJ0DVTNCExjA\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  19%|█▉        | 225/1183 [02:25<29:37,  1.86s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3njPW0vttbjt5j1Elt6sJI\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  32%|███▏      | 381/1183 [03:26<23:39,  1.77s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3T7zNYia3nk9d8uXhO9Xud\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  53%|█████▎    | 630/1183 [05:23<16:28,  1.79s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/41Sfs0E8hr8w2BvzUtof4O\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  54%|█████▎    | 633/1183 [05:29<20:57,  2.29s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3H9aA6IO5gfHW72m8YU8Iv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  57%|█████▋    | 675/1183 [05:56<15:49,  1.87s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/0lvHnw9Exl8jLV3zuRsksJ\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  67%|██████▋   | 792/1183 [07:06<12:08,  1.86s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/17sSDGIRIkB0jOKb2cBURf\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  77%|███████▋  | 911/1183 [08:03<09:15,  2.04s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/5RcZ5jbBgKDdM6BuoSeh8P\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  77%|███████▋  | 912/1183 [08:08<13:32,  3.00s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/0YQrHOpi219lZA8SDly4iG\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews:  90%|█████████ | 1069/1183 [09:31<03:31,  1.85s/it]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2iql0ydkQX1hZ375EyRFFF\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Fetching previews: 100%|██████████| 1183/1183 [10:19<00:00,  1.91it/s]\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "from spotify_preview import get_spotify_preview_url\n",
    "from tqdm import tqdm\n",
    "\n",
    "# --- 0) Load & dedupe your balanced track/genre file ---\n",
    "df = pd.read_csv('track_genres_balanced.csv')\n",
    "df = df.drop_duplicates(subset=['track_id'])\n",
    "df = df.dropna(subset=['genre'])\n",
    "\n",
    "# --- 1) Prep output CSV (header only once) ---\n",
    "#output_csv = 'track_genre_balanced_url.csv'\n",
    "#pd.DataFrame(columns=['track_id','genre','preview']).to_csv(output_csv, index=False)\n",
    "#output_csv = pd.read_csv('track_genre_balanced_url.csv')\n",
    "\n",
    "output_csv = 'track_genre_balanced_url.csv'\n",
    "\n",
    "if os.path.exists(output_csv):\n",
    "    # load already-fetched track_ids and drop them from df\n",
    "    done = pd.read_csv(output_csv, usecols=['track_id'])\n",
    "    processed_ids = set(done['track_id'].astype(str))\n",
    "    df = df[~df['track_id'].astype(str).isin(processed_ids)]\n",
    "    write_header = False\n",
    "else:\n",
    "    # new file → write header\n",
    "    pd.DataFrame(columns=['track_id','genre','preview']) \\\n",
    "      .to_csv(output_csv, index=False)\n",
    "    write_header = False  # header is already there\n",
    "\n",
    "# --- 2) Parameters ---\n",
    "BATCH_SIZE = 100   # how many tracks to process per “mini‐batch”\n",
    "PAUSE      = 0.1   # if you want a small sleep between API calls\n",
    "\n",
    "# --- 3) Loop with a single progress bar over all tracks ---\n",
    "with tqdm(total=len(df), desc=\"Fetching previews\") as pbar:\n",
    "    for start in range(0, len(df), BATCH_SIZE):\n",
    "        chunk = df.iloc[start:start + BATCH_SIZE]\n",
    "        rows = []\n",
    "\n",
    "        # 4) Per‐track lookup\n",
    "        for _, row in chunk.iterrows():\n",
    "            track_id = row['track_id']\n",
    "            genre    = row['genre']\n",
    "\n",
    "            preview = get_spotify_preview_url(track_id)\n",
    "            if preview:\n",
    "                rows.append({\n",
    "                    'track_id': track_id,\n",
    "                    'genre':     genre,\n",
    "                    'preview':   preview\n",
    "                })\n",
    "            # else: silently skip or print an error if you prefer\n",
    "\n",
    "            pbar.update(1)\n",
    "            if PAUSE:\n",
    "                import time; time.sleep(PAUSE)\n",
    "\n",
    "        # 5) Append this batch’s hits to disk\n",
    "        if rows:\n",
    "            pd.DataFrame(rows).to_csv(\n",
    "                output_csv,\n",
    "                mode='a',\n",
    "                header=False,\n",
    "                index=False\n",
    "            )\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
@@ -1,6 +1,5 @@
 import base64
 import json
 import logging as log
 import os
 import time
 from http.server import BaseHTTPRequestHandler, HTTPServer
@@ -9,7 +8,11 @@ from urllib.parse import parse_qs, urlencode, urlparse
 import dotenv
 import requests
-TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json')
+from logger import LoggerWrapper
 TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'tokens.json')
 log = LoggerWrapper()
 def simple_authenticate(grant_type: str = "client_credentials") -> str:
@@ -32,13 +35,17 @@ def simple_authenticate(grant_type: str = "client_credentials") -> str:
        "grant_type": f"{grant_type}"
    }
-    response = requests.post(token_url, headers=headers, data=data)
+    try:
        response = requests.post(token_url, headers=headers, data=data)
    except requests.exceptions.RequestException as e:
        log.error(f"Error authenticating: {e}")
        return None
    if response.status_code == 200:
        access_token = response.json().get('access_token')
        return access_token
    else:
-        log.error(f"Error {response.status_code}: {response.text}")
+        log.error(f"Error authenticating {response.status_code}: {response.text}")
 def authenticate(scope: str) -> str:
@@ -101,10 +108,14 @@ def _read_env_file() -> tuple:
    :return: tuple
    """
-    current_dir = os.path.dirname(os.path.abspath(__file__))
+    try:
-    dotenv_folder_path = os.path.join(current_dir, 'env')
+        current_dir = os.path.dirname(os.path.abspath(__file__))
-    dotenv_path = os.path.join(dotenv_folder_path, '.env')
+        dotenv_folder_path = os.path.join(current_dir, '../config')
-    contents = dotenv.dotenv_values(dotenv_path=dotenv_path)
+        dotenv_path = os.path.join(dotenv_folder_path, '.env')
        contents = dotenv.dotenv_values(dotenv_path=dotenv_path)
    except Exception as e:
        log.error(f"Error reading the .env file: {e}")
        return None
    spotify_client_id = contents['SPOTIFY_CLIENT_ID']
    spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET']
    spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI']
@@ -158,7 +169,12 @@ def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, clien
        'client_secret': client_secret,
    }
-    response = requests.post(token_url, data=data, headers=headers)
+    try:
        response = requests.post(token_url, data=data, headers=headers)
    except requests.exceptions.RequestException as e:
        log.error(f"Error exchanging code for token: {e}")
        return None
    response_data = response.json()
    if 'access_token' not in response_data:
@@ -192,7 +208,12 @@ def _refresh_access_token(refresh_token: str, client_id: str, client_secret: str
        'client_secret': client_secret,
    }
-    response = requests.post(token_url, data=data, headers=headers)
+    try:
        response = requests.post(token_url, data=data, headers=headers)
    except requests.exceptions.RequestException as e:
        log.error(f"Error refreshing access token: {e}")
        return None
    response_data = response.json()
    if 'access_token' not in response_data:
@@ -1,7 +1,12 @@
 import logging as log
 import sqlite3
 from enum import Enum
 from logger import LoggerWrapper
 # DATABASE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'spotify_scraped.db')
 log = LoggerWrapper()
 class Table(Enum):
    TRACK_INFORMATION = "track_information"
@@ -16,7 +21,7 @@ class Database:
    A class to handle the database connection and operations
    """
-    def __init__(self, db_name):
+    def __init__(self, db_name: str):
        """Initialize the connection to the database"""
        self.db_name = db_name
        self.conn = sqlite3.connect(db_name)
@@ -60,8 +65,18 @@ class Database:
        self.cursor.execute(f'''
        CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} (
            track_id TEXT PRIMARY KEY,
-            attribute_name TEXT,
+            acousticness FLOAT,
-            attribute_value TEXT
+            danceability FLOAT,
            duration_ms INTEGER,
            energy FLOAT,
            instrumentalness FLOAT,
            key INTEGER,
            liveness FLOAT,
            loudness FLOAT,
            speechiness FLOAT,
            tempo FLOAT,
            time_signature INTEGER,
            valence FLOAT
        );
        ''')
@@ -73,12 +88,14 @@ class Database:
            album_id TEXT,
            FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id),
            FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id),
-            FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id)
+            FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id),
            FOREIGN KEY (track_id) REFERENCES {Table.TRACK_ATTRIBUTES.value}(track_id)
        );
        ''')
        # Commit the changes
        self.conn.commit()
        log.debug("Initialised tables")
    def add_row(self, table: Table, values):
        """Add a new row into the specified table"""
@@ -88,17 +105,22 @@ class Database:
            self.cursor.execute(query, values)
            self.conn.commit()
        except Exception as e:
-            log.debug(f"Error: {e}")
+            log.error(f"Error while inserting row into table {table.value}: {e}")
    def read_all_rows(self, table: Table, column: str = "*"):
        """Read all rows from the specified table"""
-        self.cursor.execute(f"SELECT {column} FROM {table.value}")
+        try:
-        rows = self.cursor.fetchall()
+            self.cursor.execute(f"SELECT {column} FROM {table.value}")
-        return rows
+            rows = self.cursor.fetchall()
            return rows
        except Exception as e:
            log.error(f"Error while reading all rows from table {table.value}: {e}")
            return []
-    def close(self):
+    def close(self, message: str):
        """Close the database connection"""
        self.conn.close()
        log.info(f"Database connection closed from file: {message}")
    def get_total_overview(self) -> list:
        """Retrieve a total overview of all recently played songs with full details"""
@@ -122,5 +144,6 @@ class Database:
            rows = self.cursor.fetchall()
            return rows
        except Exception as e:
-            log.error(f"Error retrieving total overview: {e}")
+            log.error(f"Error retrieving total overview: {e}"
                      f"\nQuery Executed: {query}")
            return []
@@ -0,0 +1,149 @@
 import json
 import os
 from auth import simple_authenticate
 from database_handler import Database, Table
 from logger import LoggerWrapper
 from spotify_api import get_multiple_field_information
 # Define the absolute folder path to the folder containing the gdrp retrieved data
 folder_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'gdpr_data')
 log = LoggerWrapper()
 def _read_gdrp_data() -> list:
    """
    This function reads all .json files in the folder containing the gdpr data.
    This data is then extracted into a dict and sorted by timestamp ascending.
    :return: all_songs_played: A dict with an items field containing all songs played for the user
    """
    all_songs_played = []
    try:
        for filename in os.listdir(folder_path):
            if filename.endswith('.json'):
                file_path = os.path.join(folder_path, filename)
                with open(file_path, 'r') as file:
                    data = json.load(file)
                    for entry in data:
                        # This removes all podcasts from the list
                        if entry['spotify_track_uri'] is None:
                            continue
                        try:
                            track = {
                                'timestamp': entry['ts'],
                                'id': _extract_id(entry['spotify_track_uri']),
                                'track_name': entry['master_metadata_track_name'],
                                'artist_name': entry['master_metadata_album_artist_name'],
                                'album_name': entry['master_metadata_album_album_name'],
                                'conn_country': entry['conn_country'],
                                'ms_played': entry['ms_played']
                                }
                            all_songs_played.append(track)
                        except Exception as e:
                            log.warning(f'Missing field from gdpr data: {e}')
    except Exception as e:
        log.error(f'Failed to read gdpr data: {e}')
    all_songs_played = sorted(all_songs_played, key=lambda x: x['timestamp'])
    return all_songs_played
 def _extract_id(spotify_id: str) -> str:
    """
    This function gets a id with extra details and extracts the id from it.
    :param: id a string containing the id
    :return: str the ID
    """
    prefix = "spotify:track:"
    prefix_removed_id = spotify_id[len(prefix):]
    return prefix_removed_id
 def _populate_ids(all_songs_played: list):
    track_ids = []
    all_songs_played_info = []
    token = simple_authenticate()
    processed_songs_id = set()
    counter = 0
    for entry in all_songs_played:
        track_id = entry['id']
        if track_id not in processed_songs_id:
            track_ids.append(track_id)
            processed_songs_id.add(track_id)
            counter += 1
        if (counter + 1) % 50 == 0 and len(track_ids) > 0:
            track_ids_tuple = tuple(track_ids)
            track_ids.clear()
            response = get_multiple_field_information(token, 'tracks', 50, *track_ids_tuple)
            all_songs_played_info.extend(_sort_and_create_required_dataset(response))
            counter = 0
    if len(track_ids) > 0:
        track_ids_tuple = tuple(track_ids)
        response = get_multiple_field_information(token, 'tracks', 50, *track_ids_tuple)
        all_songs_played_info.extend(_sort_and_create_required_dataset(response))
    return all_songs_played_info
 def _sort_and_create_required_dataset(response) -> dict:
    track_list = []
    for entry in response['tracks']:
        track_data = {
            'track_id': entry['id'],
            'album_id': entry['album']['id'],
            'artist_id': entry['artists'][0]['id']
        }
        track_list.append(track_data)
    return track_list
 def _fill_missing_ids(all_songs_played, all_songs_catalogued):
    # Create a dictionary to map track_id to artist_id and album_id
    track_id_to_artist_album = {data['track_id']: {'album_id': data['album_id'], 'artist_id': data['artist_id']} for data in all_songs_catalogued}
    # Now, we will update the original `tracks` list by adding artist_id and album_id
    for track in all_songs_played:
        track_info = track_id_to_artist_album.get(track['id'])
        if track_info:
            track['artist_id'] = track_info['artist_id']
            track['album_id'] = track_info['album_id']
    return all_songs_played
 def _insert_data_into_db(db: Database, all_songs_played: list):
    """
    This function takes a list of all played songs and inserts these into the database.
    :param: all_songs_played list of all songs
    """
    for entry in all_songs_played:
        try:
            db.add_row(Table.RECENTLY_PLAYED, (entry['timestamp'], entry['id'], entry['artist_id'], entry['album_id']))
        except Exception as e:
            log.error(f'Failed adding {entry} to database, error {e}')
 def export_gdpr_data(db: Database, n_limit: int = 100) -> None:
    all_songs_played = _read_gdrp_data()
    all_songs_played = all_songs_played[-n_limit:]
    all_songs_catalogued = _populate_ids(all_songs_played)
    all_songs_played = _fill_missing_ids(all_songs_played, all_songs_catalogued)
    _insert_data_into_db(db, all_songs_played)
@@ -0,0 +1,61 @@
 import logging
 import os
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
 class LoggerWrapper():
    def __init__(self, logger_name: str = "standard_logger"):
        self.logger = logging.getLogger(logger_name)
        if not self.logger.handlers:
            self.logger.setLevel(logging.DEBUG)
            self.setup_logger()
    def set_console_handler_to_debug(self):
        for handler in self.logger.handlers:
            if isinstance(handler, logging.StreamHandler):
                handler.setLevel(logging.DEBUG)
    def setup_logger(self):
        # Define and create folder
        logs_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'logs')
        Path(logs_folder).mkdir(parents=True, exist_ok=True)
        # Define file path
        log_file = log_file = os.path.join(logs_folder, 'predictify.log')
        # Setup File Handler
        handler = RotatingFileHandler(log_file, maxBytes=1000000, backupCount=5)
        handler.setLevel(logging.DEBUG)
        # Setup Console Handler
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.WARNING)
        # Setup Formatter
        formatter = logging.Formatter('%(asctime)s - [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s')
        # Add Formatters to Handlers
        handler.setFormatter(formatter)
        console_handler.setFormatter(formatter)
        # Add Handlers to Logger
        self.logger.addHandler(handler)
        self.logger.addHandler(console_handler)
    def info(self, message):
        self.logger.info(message)
    def debug(self, message):
        self.logger.debug(message)
    def warning(self, message):
        self.logger.warning(message)
    def error(self, message):
        self.logger.error(message)
    def critical(self, message):
        self.logger.critical(message)
        # Here we can add alerting/handling
@@ -1,8 +1,75 @@
 import argparse
 import atexit
 import os
 import sys
 import traceback
 from time import sleep
-from scraper import scraping
+from database_handler import Database
 from gdpr_export import export_gdpr_data
 from logger import LoggerWrapper
 from scraper import scrape_missing_infos, scraping
 log = LoggerWrapper()
 def _handle_exit():
    """
    Function to log exit information if the script ends unexpectedly.
    """
    log.critical("Script terminated unexpectedly.")
 def _log_crash_info(exc_type, exc_value, exc_tb):
    """Custom function to log crash info when an exception occurs."""
    log.critical("A critical error occurred!", exc_info=(exc_type, exc_value, exc_tb))
    log.critical("Exception type: %s", exc_type)
    log.critical("Exception message: %s", exc_value)
    log.critical("Stack trace:\n%s", ''.join(traceback.format_tb(exc_tb)))
 # Register the exit handler and excepthook
 atexit.register(_handle_exit)
 sys.excepthook = _log_crash_info
 # Initialize the parser
 parser = argparse.ArgumentParser(description="A python script written in Python3.13 which continuously checks what spotify songs "
                                             "the user is listening to and logging these in a local database. \n"
                                             "The Script also has a export function where it can read out the gdpr data exported by the user.")
 # Add optional arguments
 parser.add_argument('--verbose', '-v', action='store_true', help="Enable verbose output")
 parser.add_argument('--export', type=str, choices=['TEST', 'PRODUCTION'], required=True,
                    help="Export the gdpr data from spotify if not done already. Choose between TEST and PRODUCTION."
                    "TEST will export only a small number of songs, PRODUCTION will export all songs.")
 # Parse the arguments
 args = parser.parse_args()
 if args.verbose:
    log.set_console_handler_to_debug()
    log.info('Enabled verbose mode')
 db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', f'spotify_scrape_{args.export}.db')
 if args.export == 'TEST':
    export_size = 10000
    log.info(f'Scraping GDPR Data. Sample size: {export_size}')
    db = Database(db_path)
    export_gdpr_data(db, export_size)
    scrape_missing_infos(db)
 elif args.export == 'PRODUCTION':
    export_size = 1000000
    log.info('Scraping all GDPR Data.')
    db = Database(db_path)
    export_gdpr_data(db, export_size)
    scrape_missing_infos(db)
 else:
    raise ValueError('Invalid export type. Please choose between TEST and PRODUCTION.')
 # Run forever on intervals of 30 minutes
 while True:
-    scraping()
+    log.info('Scraping API...')
    scraping(db)
    log.info('Done scraping API. Sleeping for 30 minutes...')
    sleep(1800)
@@ -1,150 +1,131 @@
 import requests
 from auth import authenticate, simple_authenticate
 from database_handler import Database, Table
 from logger import LoggerWrapper
 from spotify_api import get_last_played_track, get_multiple_field_information
-db = Database('spotify_scraped.db')
+log = LoggerWrapper()
-def scraping():
+def scraping(db: Database) -> None:
    """
    This function is the main function that will be executed when the script is run
    """
    global db
    scope = "user-read-recently-played"
    bearer_token = authenticate(scope)
-    # Once each 30 mins
+    _read_recently_played_page_and_add_to_db(db, bearer_token)
-    _read_recently_played_page_and_add_to_db(bearer_token=bearer_token)
+    scrape_missing_infos(db)
    _scrape_missing_infos()
    db.close()
-def _read_recently_played_page_and_add_to_db(bearer_token: str):
+def _read_recently_played_page_and_add_to_db(db: Database, bearer_token: str) -> None:
    """
-    """
+    This function gets a list of song play history and adds it into the database.
    global db
    last_played_track = _get_last_played_track(bearer_token=bearer_token)
    for track in last_played_track['items']:
        track_id = track['track']['id']
        played_at = track['played_at']
        album_id = track['track']['album']['id']
        artist_id = track['track']['artists'][0]['id']
        db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
 def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict:
    """
    This function returns the last played track based on the limit size
    :param limit: str
    :param bearer_token: str
    :return: dict
    """
-    header = {
+    last_played_track = get_last_played_track(bearer_token=bearer_token)
        'Authorization': f'Bearer {bearer_token}'
    }
-    response = requests.get(url, headers=header)
+    try:
-    response_json = response.json()
+        for track in reversed(last_played_track['items']):
-    return response_json
+            track_id = track['track']['id']
            played_at = track['played_at']
            album_id = track['track']['album']['id']
            artist_id = track['track']['artists'][0]['id']
            db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
    except Exception as e:
        log.error(f"Failed to add returned play history to database: {e}"
                  f"\nReturned Value: {last_played_track}")
-def _get_track_information(track_id: str, bearer_token: str) -> dict:
+def scrape_missing_infos(db: Database) -> None:
    """
    This function returns the track information based on the track id
    :param track_id: str
    :param bearer_token: str
    :return: dict
    """
    url = f"https://api.spotify.com/v1/tracks/{track_id}"
    header = {
        'Authorization': f'Bearer {bearer_token}'
    }
    response = requests.get(url, headers=header)
    response_json = response.json()
    return response_json
 def _get_artist_information(artist_id: str, bearer_token: str) -> dict:
    """
    This function returns the artist information based on the artist id
    :param artist_id: str
    :param bearer_token: str
    :return: dict
    """
    url = f"https://api.spotify.com/v1/artists/{artist_id}"
    header = {
        'Authorization': f'Bearer {bearer_token}'
    }
    response = requests.get(url, headers=header)
    response_json = response.json()
    return response_json
 def _get_album_information(album_id: str, bearer_token: str) -> dict:
    """
    This function returns the album information based on the album id
    :param album_id: str
    :param bearer_token: str
    :return: dict
    """
    url = f"https://api.spotify.com/v1/albums/{album_id}"
    header = {
        'Authorization': f'Bearer {bearer_token}'
    }
    response = requests.get(url, headers=header)
    response_json = response.json()
    return response_json
 def _scrape_missing_infos():
    """
    """
    global db
    bearer_token_simple = simple_authenticate()
-    # Track Info
+    _process_missing_info(db, bearer_token_simple, Table.TRACK_INFORMATION, 'track_id', 'tracks')
-    all_track_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id')
+    _process_missing_info(db, bearer_token_simple, Table.ALBUM_INFORMATION, 'album_id', 'albums')
-    all_track_ids_saved = db.read_all_rows(Table.TRACK_INFORMATION, 'track_id')
+    _process_missing_info(db, bearer_token_simple, Table.ARTIST_INFORMATION, 'artist_id', 'artists')
-    all_track_ids_missing = list(set(all_track_ids_recently_played) - set(all_track_ids_saved))
+    # _process_missing_info(db, bearer_token_simple, Table.TRACK_ATTRIBUTES, 'track_id', 'audio-features')
-    for track_id in all_track_ids_missing:
+
-        response = _get_track_information(track_id=track_id[0], bearer_token=bearer_token_simple)
+
-        db.add_row(Table.TRACK_INFORMATION, (response['id'], response['name'], response['duration_ms'], response['explicit'], response['popularity']))
+def _process_missing_info(db: Database, bearer_token_simple: str, table_name: Table, id_field_name: str, endpoint_name: str) -> None:
-    # Album Info
+
-    all_album_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'album_id')
+    if endpoint_name == 'albums':
-    all_album_ids_saved = db.read_all_rows(Table.ALBUM_INFORMATION, 'album_id')
+        limit = 20
-    all_album_ids_missing = list(set(all_album_ids_recently_played) - set(all_album_ids_saved))
+    elif endpoint_name == 'audio-features':
-    for album_id in all_album_ids_missing:
+        limit = 100
-        response = _get_album_information(album_id=album_id[0], bearer_token=bearer_token_simple)
+    else:
-        try:
+        limit = 50
-            release_year = response['release_date'][:4]
+
-        except Exception:
+    all_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, id_field_name)
-            release_year = ""
+    all_ids_saved = db.read_all_rows(table_name, id_field_name)
-        db.add_row(Table.ALBUM_INFORMATION, (response['id'], response['name'], response['album_type'], response['total_tracks'], release_year, response['label']))
+    all_ids_missing = list(set(all_ids_recently_played) - set(all_ids_saved))
-    # Artist Info
+
-    all_artist_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'artist_id')
+    log.debug(f"Number of missing {table_name.name} entries: {len(all_ids_missing)}. Inserting...")
-    all_artist_ids_saved = db.read_all_rows(Table.ARTIST_INFORMATION, 'artist_id')
+
-    all_artist_ids_missing = list(set(all_artist_ids_recently_played) - set(all_artist_ids_saved))
+    ids = []
-    for artist_id in all_artist_ids_missing:
+    processed_ids = set()
-        response = _get_artist_information(artist_id=artist_id[0], bearer_token=bearer_token_simple)
+
-        try:
+    counter = 0
-            genre = response['genres'][0]
+
-        except IndexError:
+    for id_value in all_ids_missing:
-            genre = ""
+
-        db.add_row(Table.ARTIST_INFORMATION, (response['id'], response['name'], response['followers']['total'], genre, response['popularity']))
+        id_value_str = id_value[0]
        if id_value_str not in processed_ids:
            ids.append(id_value_str)
            processed_ids.add(id_value_str)
            counter += 1
        if (counter + 1) % limit == 0 and len(ids) > 0:
            ids_tuple = tuple(ids)
            ids.clear()
            response = get_multiple_field_information(bearer_token_simple, endpoint_name, limit, *ids_tuple)
            _add_data_to_database(db, table_name, response)
            counter = 0
    if len(ids) > 0:
        ids_tuple = tuple(ids)
        ids.clear()
        response = get_multiple_field_information(bearer_token_simple, endpoint_name, limit, *ids_tuple)
        _add_data_to_database(db, table_name, response)
 def _add_data_to_database(db: Database, table_name: Table, response) -> None:
    if table_name == Table.TRACK_INFORMATION:
        log.debug('Adding track information to database')
        for entry in response['tracks']:
            log.debug(f"Adding track: {entry['name']}")
            db.add_row(table_name, (entry['id'], entry['name'], entry['duration_ms'], entry['explicit'], entry['popularity']))
    elif table_name == Table.ALBUM_INFORMATION:
        log.debug('Adding album information to database')
        for entry in response['albums']:
            log.debug(f"Adding album: {entry['name']}")
            try:
                release_year = entry['release_date'][:4]
            except Exception:
                release_year = ""
            db.add_row(table_name, (entry['id'], entry['name'], entry['album_type'], entry['total_tracks'], release_year, entry['label']))
    elif table_name == Table.ARTIST_INFORMATION:
        log.debug('Adding artist information to database')
        for entry in response['artists']:
            log.debug(f"Adding artist: {entry['name']}")
            try:
                genre = entry['genres'][0]
            except IndexError:
                genre = ""
            db.add_row(Table.ARTIST_INFORMATION, (entry['id'], entry['name'], entry['followers']['total'], genre, entry['popularity']))
    elif table_name == Table.TRACK_ATTRIBUTES:
        log.debug('Adding track attributes to database')
        for entry in response['audio_features']:
            log.debug(f"Adding track attributes: {entry['id']}")
            try:
                db.add_row(Table.TRACK_ATTRIBUTES, (entry['id'], entry['aucousticness'], entry['danceability'], entry['duration_ms'], entry['energy'], entry['instrumentalness'], entry['key'], entry['liveness'], entry['loudness'], entry['speechiness'], entry['tempo'], entry['time_signature'], entry['valence']))
            except Exception as e:
                log.error(f"Failed to add track attributes to database: {e}"
                          f"\nReturned Value: {response}")
@@ -0,0 +1,140 @@
 from typing import Union
 import requests
 from logger import LoggerWrapper
 log = LoggerWrapper()
 def get_last_played_track(bearer_token: str, url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50") -> Union[dict, None]:
    """
    This function returns the last played track based on the limit size
    :param limit: str
    :param bearer_token: str
    :return: dict
    """
    header = {
        'Authorization': f'Bearer {bearer_token}'
    }
    try:
        log.debug(f"GET Request: {url}")
        response = requests.get(url, headers=header)
        response_json = response.json()
        return response_json
    except requests.exceptions.RequestException as e:
        log.error(f"Error in get_last_played_track: {e}")
        return None
 def get_track_information(track_id: str, bearer_token: str) -> Union[dict, None]:
    """
    This function returns the track information based on the track id
    :param track_id: str
    :param bearer_token: str
    :return: dict
    """
    url = f"https://api.spotify.com/v1/tracks/{track_id}"
    header = {
        'Authorization': f'Bearer {bearer_token}'
    }
    try:
        log.debug(f"GET Request: {url}")
        response = requests.get(url, headers=header)
        response_json = response.json()
        return response_json
    except requests.exceptions.RequestException as e:
        log.error(f"Error in get_track_information: {e}")
        return None
 def get_artist_information(artist_id: str, bearer_token: str) -> Union[dict, None]:
    """
    This function returns the artist information based on the artist id
    :param artist_id: str
    :param bearer_token: str
    :return: dict
    """
    url = f"https://api.spotify.com/v1/artists/{artist_id}"
    header = {
        'Authorization': f'Bearer {bearer_token}'
    }
    try:
        log.debug(f"GET Request: {url}")
        response = requests.get(url, headers=header)
        response_json = response.json()
        return response_json
    except requests.exceptions.RequestException as e:
        log.error(f"Error in get_artist_information: {e}")
        return None
 def get_album_information(album_id: str, bearer_token: str) -> Union[dict, None]:
    """
    This function returns the album information based on the album id
    :param album_id: str
    :param bearer_token: str
    :return: dict
    """
    url = f"https://api.spotify.com/v1/albums/{album_id}"
    header = {
        'Authorization': f'Bearer {bearer_token}'
    }
    try:
        log.debug(f"GET Request: {url}")
        response = requests.get(url, headers=header)
        response_json = response.json()
        return response_json
    except requests.exceptions.RequestException as e:
        log.error(f"Error in get_album_information: {e}")
        return None
 def get_multiple_field_information(bearer_token: str, api_type: str, limit: int,  *track_ids) -> Union[dict, None]:
    """
    This function returns the track information based on the track id
    :param *track_id: str
    :param bearer_token: str
    :return: dict
    """
    if len(track_ids) > limit:
        log.error(f'exceeding the limit if ids {limit} for endpoint {api_type}')
        return None
    url_suffix = "ids="
    separator = ","
    try:
        for track_id in track_ids:
            url_suffix = url_suffix + track_id + separator
    except Exception as e:
        log.error(f"Failed setting up the url for multiple ids request."
                  f"Error: {e}")
        return None
    url = f"https://api.spotify.com/v1/{api_type}?{url_suffix}"
    url = url[:-len(separator)]
    header = {
        'Authorization': f'Bearer {bearer_token}'
    }
    try:
        log.debug(f"GET Request: {url}")
        response = requests.get(url, headers=header)
        response_json = response.json()
        return response_json
    except requests.exceptions.RequestException as e:
        log.error(f"Error in get_multiple_field_information: {e}")
        return None
@@ -1,14 +0,0 @@
 #!/bin/sh
 #
 # Starup the predictify scraper
 if test -f ./requirements.txt 
 then
    python3 -m venv .venv
    .venv/bin/pip install -r ./requirements.txt
 else 
    printf "Missing requirements file! aborting...\n"
    exit 1
 fi
 .venv/bin/python3 src/runtime.py