Merge pull request #30 from agresdominik/feat/audio_analysis

Feat/audio analysis
This commit is contained in:
Dominik
2025-09-25 01:10:01 +02:00
committed by GitHub
37 changed files with 746059 additions and 198 deletions
+11
View File
@@ -0,0 +1,11 @@
logs/
data/
src/__pycache__/
.git
*.md
.venv
LICENSE
MAKEFILE
pytest.ini
test/
+25 -5
View File
@@ -1,11 +1,31 @@
# Machine Learning grid search
my_dir/
# Audio previews
audio_previews/
# Audio data files
audio_features*
audio_data/
# My testing file
main_test.py
# .db
*.db
# DS_Store
.DS_Store
# Gdpr Data file
Streaming_History*
# Test running file # Test running file
main_test.py main_test.py
# databases # data dir
*.db data/*
data-docker/
# Custom Tokens file/rotator
tokens.json
# Visual Studio Code # Visual Studio Code
.vscode/ .vscode/
+1 -1
View File
@@ -22,7 +22,7 @@ repos:
files: \.(json)$ files: \.(json)$
- id: check-added-large-files # Prevent large files from being committed - id: check-added-large-files # Prevent large files from being committed
args: ['--maxkb=1000'] args: ['--maxkb=2000']
- id: check-ast # Check for parse errors in Python files - id: check-ast # Check for parse errors in Python files
exclude: '.*test.*' exclude: '.*test.*'
-24
View File
@@ -1,24 +0,0 @@
FROM alpine:latest
WORKDIR /root
RUN apk update && \
apk add --no-cache \
openssh \
python3 \
py3-pip \
sqlite
EXPOSE 22
RUN mkdir /root/src
COPY ./startup.sh /root
COPY ./requirements.txt /root
COPY ./src/ /root/src/
RUN ls -la
VOLUME /root
ENTRYPOINT ["/bin/sh", "/root/startup.sh"]
+19
View File
@@ -0,0 +1,19 @@
.PHONY: all dockerfile clean
TAG="unstable"
PROJ_NAME="predictify"
all: install dockerfile
install:
mkdir -p ./data
dockerfile: ./docker/Dockerfile
docker build \
--tag "$(PROJ_NAME):$(TAG)" \
--build-arg PROJ_NAME=$(PROJ_NAME) \
--file ./docker/Dockerfile \
.
clean: ./spotify_scraped.db
rm -r ./data/spotify_scraped.db
+35 -5
View File
@@ -11,15 +11,45 @@ A Data analysis tool to scrape your Spotify History usage and let a ML-Model pre
## Usable possible APIs ## Usable possible APIs
Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played) Recently Played Tracks: `/me/player/recently-played` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)
Get Track: /tracks/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track) Get Track: `/tracks/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)
Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features) Get Track's Audio Features _(Deprecated)_: `/audio-features/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)
Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis) Get Track's Audio Analysis _(Deprecated)_: `/audio-analysis/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)
Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist) Get Artist: `/artists/{id}` [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
## Docker usage
`cd` inside the projects directory:
```sh
cd predictify
```
To run predictify inside a container, first make sure to build the image:
```sh
make dockerfile
```
Create a seperate data directory (e.g. `data-docker`):
```sh
mkdir data-docker
```
> [!NOTE]
> To detatch the container to run it in the background add the `--detach` directly after the `run` command.
Then run the following docker command, to run the container in the foreground:
```sh
docker run \
--name predictify \
--network=host \
--volume $(pwd)/data-docker:/app/predictify/data \
--volume $(pwd)/config:/app/predictify/config \
predictify:unstable
```
## GDPR Data
If you have gdpr data, create a folder: ```data/gdpr_data``` and add all .json files containing your play history into it. In order to extract it, run the script: ```python3 src/runtime.py --export```
## Authors ## Authors
+35
View File
@@ -0,0 +1,35 @@
FROM alpine:3.21.3
# Set environment variables
ARG PROJ_NAME
ENV PROJ_NAME=${PROJ_NAME}
RUN mkdir -p /app/${PROJ_NAME}
# The following steps are executed from the specified directory below
WORKDIR /app/${PROJ_NAME}
# Install all necessary software
RUN apk add --no-cache python3 sqlite
# Create the directories, needed for persistent storage (e.g. database, tokens)
RUN mkdir ./data ./src ./config
# Create mount points for logs, data, src and config
VOLUME /var/log ./data ./src ./config
# Copy the application source code
COPY ./src/ ./src/
# Create a seperate venv inside the container & install requirements
COPY ./requirements.txt ./requirements.txt
RUN \
python -m venv .venv && \
source .venv/bin/activate && \
./.venv/bin/pip install -r ./requirements.txt && \
deactivate
COPY ./docker/startup.sh ./startup.sh
# When starting the contianer the following is executed
ENTRYPOINT ["./startup.sh"]
+5
View File
@@ -0,0 +1,5 @@
#!/bin/sh
#
# Startup predictify. Don't use this. This is for docker specifically.
source .venv/bin/activate
.venv/bin/python src/runtime.py --export
+11
View File
@@ -4,3 +4,14 @@ pre-commit==4.1.0
pytest==8.3.5 pytest==8.3.5
coverage==7.7.0 coverage==7.7.0
pytest-cov==6.0.0 pytest-cov==6.0.0
pandas==2.2.3
numpy==1.26.4
scikit-learn==1.6.1
tensorflow==2.19.0
keras==3.9.2
keras-tuner==1.4.7
scikeras==0.13.0
matplotlib==3.10.1
seaborn==0.13.2
librosa==0.11.0
optuna==4.2.1
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+28
View File
@@ -0,0 +1,28 @@
import re
from typing import Optional
import requests
def get_spotify_preview_url(spotify_track_id: str) -> Optional[str]:
"""
Get the preview URL for a Spotify track using the embed page workaround.
Args:
spotify_track_id (str): The Spotify track ID
Returns:
Optional[str]: The preview URL if found, else None
"""
try:
embed_url = f"https://open.spotify.com/embed/track/{spotify_track_id}"
response = requests.get(embed_url)
response.raise_for_status()
html = response.text
match = re.search(r'"audioPreview":\s*{\s*"url":\s*"([^"]+)"', html)
return match.group(1) if match else None
except Exception as e:
print(f"Failed to fetch Spotify preview URL: {e}")
return None
+391
View File
@@ -0,0 +1,391 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7814.41track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8865.11track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8410.16track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 10286.20track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6751.92track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7016.85track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 9608.71track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 569.98track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 8934.23track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 3487.43track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 8381.08track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 3057.72track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6150.47track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6555.71track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 2342.34track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 9073.67track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6341.27track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4801.47track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4224.31track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 7571.09track/s]\n",
"Downloading previews: 100%|██████████| 91/91 [00:00<00:00, 6534.41track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7016.58track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 7011.93track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 7224.25track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 5970.09track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 1830.87track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7771.45track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 3839.22track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 8010.83track/s]\n",
"Downloading previews: 100%|██████████| 7/7 [00:00<00:00, 1725.85track/s]\n",
"Downloading previews: 100%|██████████| 80/80 [00:00<00:00, 3127.45track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 5919.12track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 2211.42track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 5711.20track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5389.72track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5007.79track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5448.83track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1677.91track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5254.51track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5087.50track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 6186.85track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 1513.61track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 6105.52track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4209.85track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1611.84track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 127.48track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 200.62track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 5717.10track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 3484.29track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 177.04track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 5664.96track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 239.08track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 223.04track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5842.92track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 7040.71track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 7355.77track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 292.89track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 8041.64track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 420.54track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6490.87track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5549.89track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5031.36track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1444.37track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 5870.31track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4974.82track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4823.21track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6310.05track/s]\n",
"Downloading previews: 100%|██████████| 196/196 [00:00<00:00, 312.44track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 5850.47track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4904.72track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 5343.90track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 4764.65track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 4891.16track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 280.38track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4945.14track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4609.60track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 1155.63track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 3454.36track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4191.60track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 4414.67track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4393.90track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 2788.99track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 6180.40track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 260.50track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 4974.38track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 204.43track/s]\n",
"Downloading previews: 100%|██████████| 189/189 [00:00<00:00, 433.69track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 4620.28track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5229.06track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:00<00:00, 6571.83track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 252.47track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 7138.69track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 4936.31track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5408.81track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6418.59track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6733.21track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6277.22track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 168.85track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 5975.06track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 7002.79track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6256.22track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6033.96track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 283.78track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 6277.83track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 5573.59track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:00<00:00, 6510.58track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6384.23track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6124.12track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6541.53track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:00<00:00, 857.85track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [00:00<00:00, 375.59track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 10254.22track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 6399.47track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 6457.48track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:00<00:00, 237.51track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6714.17track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 287.82track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 6351.42track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:00<00:00, 7704.99track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:00<00:00, 449.76track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 6541.76track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:00<00:00, 7323.53track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:00<00:00, 465.08track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:15<00:00, 6.16track/s] \n",
"Downloading previews: 100%|██████████| 97/97 [00:26<00:00, 3.60track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:25<00:00, 3.85track/s]\n",
"Downloading previews: 100%|██████████| 191/191 [00:57<00:00, 3.34track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:27<00:00, 3.49track/s]\n",
"Downloading previews: 100%|██████████| 194/194 [00:53<00:00, 3.63track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:27<00:00, 3.58track/s]\n",
"Downloading previews: 100%|██████████| 187/187 [00:55<00:00, 3.35track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:29<00:00, 3.19track/s]\n",
"Downloading previews: 100%|██████████| 196/196 [00:57<00:00, 3.41track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:25<00:00, 3.63track/s]\n",
"Downloading previews: 100%|██████████| 197/197 [00:52<00:00, 3.75track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [00:51<00:00, 3.71track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:26<00:00, 3.69track/s]\n",
"Downloading previews: 100%|██████████| 194/194 [00:55<00:00, 3.50track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:28<00:00, 3.46track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:25<00:00, 3.69track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:25<00:00, 3.65track/s]\n",
"Downloading previews: 100%|██████████| 193/193 [00:55<00:00, 3.46track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:26<00:00, 3.59track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [00:50<00:00, 3.74track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:25<00:00, 3.86track/s]\n",
"Downloading previews: 100%|██████████| 191/191 [00:52<00:00, 3.63track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:28<00:00, 3.40track/s]\n",
"Downloading previews: 100%|██████████| 195/195 [00:55<00:00, 3.54track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:27<00:00, 3.44track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:27<00:00, 3.57track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:34<00:00, 2.81track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:27<00:00, 3.55track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:27<00:00, 3.38track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [00:52<00:00, 3.64track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:32<00:00, 3.01track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:28<00:00, 3.36track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:24<00:00, 3.92track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:27<00:00, 3.40track/s]\n",
"Downloading previews: 100%|██████████| 188/188 [00:49<00:00, 3.79track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:26<00:00, 3.53track/s]\n",
"Downloading previews: 100%|██████████| 191/191 [00:55<00:00, 3.45track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:27<00:00, 3.30track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:29<00:00, 3.23track/s]\n",
"Downloading previews: 100%|██████████| 90/90 [00:22<00:00, 3.93track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:25<00:00, 3.63track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:26<00:00, 3.60track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:26<00:00, 3.72track/s]\n",
"Downloading previews: 100%|██████████| 90/90 [00:24<00:00, 3.66track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:28<00:00, 3.38track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:27<00:00, 3.59track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:25<00:00, 3.74track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:25<00:00, 3.80track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:25<00:00, 3.69track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:27<00:00, 3.62track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:25<00:00, 3.71track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:26<00:00, 3.55track/s]\n",
"Downloading previews: 100%|██████████| 193/193 [00:50<00:00, 3.83track/s]\n",
"Downloading previews: 100%|██████████| 197/197 [00:53<00:00, 3.67track/s]\n",
"Downloading previews: 100%|██████████| 185/185 [00:46<00:00, 4.01track/s]\n",
"Downloading previews: 100%|██████████| 195/195 [00:48<00:00, 4.03track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [00:51<00:00, 3.68track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:26<00:00, 3.64track/s]\n",
"Downloading previews: 100%|██████████| 197/197 [00:52<00:00, 3.72track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:24<00:00, 3.87track/s]\n",
"Downloading previews: 100%|██████████| 195/195 [01:04<00:00, 3.01track/s]\n",
"Downloading previews: 100%|██████████| 193/193 [00:54<00:00, 3.57track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:28<00:00, 3.35track/s]\n",
"Downloading previews: 100%|██████████| 194/194 [00:55<00:00, 3.47track/s]\n",
"Downloading previews: 100%|██████████| 192/192 [00:59<00:00, 3.23track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:27<00:00, 3.36track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [00:51<00:00, 3.67track/s]\n",
"Downloading previews: 100%|██████████| 189/189 [01:02<00:00, 3.01track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:28<00:00, 3.51track/s]\n",
"Downloading previews: 100%|██████████| 188/188 [00:55<00:00, 3.40track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:29<00:00, 3.19track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:38<00:00, 2.45track/s]\n",
"Downloading previews: 100%|██████████| 194/194 [00:55<00:00, 3.50track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:30<00:00, 3.13track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:27<00:00, 3.35track/s]\n",
"Downloading previews: 100%|██████████| 186/186 [00:56<00:00, 3.31track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [00:54<00:00, 3.52track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:40<00:00, 2.39track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:56<00:00, 1.64track/s]\n",
"Downloading previews: 100%|██████████| 193/193 [00:54<00:00, 3.57track/s]\n",
"Downloading previews: 100%|██████████| 195/195 [01:04<00:00, 3.03track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:32<00:00, 2.93track/s]\n",
"Downloading previews: 100%|██████████| 192/192 [01:05<00:00, 2.92track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:31<00:00, 3.12track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:34<00:00, 2.82track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [00:55<00:00, 3.40track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:26<00:00, 3.49track/s]\n",
"Downloading previews: 100%|██████████| 193/193 [00:53<00:00, 3.58track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:31<00:00, 3.03track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:28<00:00, 3.42track/s]\n",
"Downloading previews: 100%|██████████| 184/184 [00:50<00:00, 3.61track/s]\n",
"Downloading previews: 100%|██████████| 99/99 [00:31<00:00, 3.15track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:27<00:00, 3.42track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:27<00:00, 3.33track/s]\n",
"Downloading previews: 100%|██████████| 189/189 [00:52<00:00, 3.60track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:27<00:00, 3.54track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:25<00:00, 3.72track/s]\n",
"Downloading previews: 100%|██████████| 91/91 [00:26<00:00, 3.47track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:27<00:00, 3.50track/s]\n",
"Downloading previews: 100%|██████████| 88/88 [00:23<00:00, 3.78track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:29<00:00, 3.35track/s]\n",
"Downloading previews: 100%|██████████| 186/186 [00:53<00:00, 3.46track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:29<00:00, 3.22track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:30<00:00, 3.13track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:32<00:00, 2.91track/s]\n",
"Downloading previews: 100%|██████████| 186/186 [00:56<00:00, 3.27track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:27<00:00, 3.34track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:24<00:00, 3.72track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:26<00:00, 3.56track/s]\n",
"Downloading previews: 100%|██████████| 186/186 [00:53<00:00, 3.46track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:29<00:00, 3.18track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:27<00:00, 3.43track/s]\n",
"Downloading previews: 100%|██████████| 190/190 [01:01<00:00, 3.08track/s]\n",
"Downloading previews: 100%|██████████| 93/93 [00:28<00:00, 3.29track/s]\n",
"Downloading previews: 100%|██████████| 197/197 [00:59<00:00, 3.31track/s]\n",
"Downloading previews: 100%|██████████| 192/192 [00:59<00:00, 3.22track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:37<00:00, 2.59track/s]\n",
"Downloading previews: 100%|██████████| 192/192 [00:55<00:00, 3.48track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:26<00:00, 3.62track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:27<00:00, 3.48track/s]\n",
"Downloading previews: 100%|██████████| 188/188 [00:54<00:00, 3.44track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:28<00:00, 3.39track/s]\n",
"Downloading previews: 100%|██████████| 92/92 [00:28<00:00, 3.22track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:29<00:00, 3.30track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:27<00:00, 3.48track/s]\n",
"Downloading previews: 100%|██████████| 97/97 [00:29<00:00, 3.34track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:26<00:00, 3.66track/s]\n",
"Downloading previews: 100%|██████████| 193/193 [00:55<00:00, 3.49track/s]\n",
"Downloading previews: 100%|██████████| 193/193 [00:53<00:00, 3.62track/s]\n",
"Downloading previews: 100%|██████████| 94/94 [00:27<00:00, 3.41track/s]\n",
"Downloading previews: 100%|██████████| 188/188 [00:51<00:00, 3.62track/s]\n",
"Downloading previews: 100%|██████████| 191/191 [00:56<00:00, 3.41track/s]\n",
"Downloading previews: 100%|██████████| 193/193 [00:54<00:00, 3.52track/s]\n",
"Downloading previews: 100%|██████████| 191/191 [00:57<00:00, 3.30track/s]\n",
"Downloading previews: 100%|██████████| 196/196 [00:57<00:00, 3.43track/s]\n",
"Downloading previews: 100%|██████████| 95/95 [00:25<00:00, 3.67track/s]\n",
"Downloading previews: 100%|██████████| 98/98 [00:34<00:00, 2.82track/s]\n",
"Downloading previews: 100%|██████████| 188/188 [00:56<00:00, 3.35track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:28<00:00, 3.34track/s]\n",
"Downloading previews: 100%|██████████| 96/96 [00:29<00:00, 3.22track/s]\n",
"Downloading previews: 100%|██████████| 191/191 [00:58<00:00, 3.29track/s]\n",
"Downloading previews: 100%|██████████| 82/82 [00:25<00:00, 3.27track/s]\n",
"Downloading previews: 100%|██████████| 5/5 [00:00<00:00, 649.53track/s]\n",
"Downloading previews: 100%|██████████| 16/16 [00:00<00:00, 2081.48track/s]\n",
"Downloading previews: 100%|██████████| 4/4 [00:00<00:00, 1143.17track/s]\n",
"Downloading previews: 100%|██████████| 16/16 [00:00<00:00, 2154.59track/s]\n",
"Downloading previews: 100%|██████████| 49/49 [00:10<00:00, 4.51track/s]\n",
"Downloading previews: 100%|██████████| 36/36 [00:10<00:00, 3.49track/s]\n",
"Downloading previews: 100%|██████████| 19/19 [00:06<00:00, 2.76track/s]\n",
"Downloading previews: 100%|██████████| 24/24 [00:07<00:00, 3.36track/s]\n",
"Downloading previews: 100%|██████████| 20/20 [00:06<00:00, 2.99track/s]\n",
"Downloading previews: 100%|██████████| 33/33 [00:09<00:00, 3.33track/s]\n",
"Downloading previews: 100%|██████████| 30/30 [00:08<00:00, 3.72track/s]\n",
"Downloading previews: 100%|██████████| 10/10 [00:02<00:00, 3.87track/s]\n",
"Downloading previews: 100%|██████████| 2/2 [00:00<00:00, 439.26track/s]\n",
"Downloading previews: 100%|██████████| 1/1 [00:00<00:00, 5.52track/s]\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 29\u001b[39m\n\u001b[32m 26\u001b[39m df_new = df[~df[\u001b[33m'\u001b[39m\u001b[33mtrack_id\u001b[39m\u001b[33m'\u001b[39m].isin(processed)].copy()\n\u001b[32m 27\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m df_new.empty:\n\u001b[32m 28\u001b[39m \u001b[38;5;66;03m# nothing new → wait and retry\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m29\u001b[39m time.sleep(SLEEP_INTERVAL)\n\u001b[32m 30\u001b[39m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[32m 32\u001b[39m \u001b[38;5;66;03m# 3) Download each new preview with a progress bar\u001b[39;00m\n",
"\u001b[31mKeyboardInterrupt\u001b[39m: "
]
}
],
"source": [
"import os\n",
"import time\n",
"import requests\n",
"import pandas as pd\n",
"from tqdm import tqdm\n",
"\n",
"CSV_PATH = './track_genre_balanced_url.csv'\n",
"DOWNLOAD_DIR = 'audio_previews'\n",
"SLEEP_INTERVAL = 60 # seconds to wait between checks\n",
"\n",
"os.makedirs(DOWNLOAD_DIR, exist_ok=True)\n",
"\n",
"# Keep track of which track_ids we've already attempted\n",
"processed = set()\n",
"\n",
"while True:\n",
" # 1) Load current CSV\n",
" try:\n",
" df = pd.read_csv(CSV_PATH)\n",
" except FileNotFoundError:\n",
" print(f\"{CSV_PATH} not found, waiting...\")\n",
" time.sleep(SLEEP_INTERVAL)\n",
" continue\n",
"\n",
" # 2) Identify new tracks we haven't processed yet\n",
" df_new = df[~df['track_id'].isin(processed)].copy()\n",
" if df_new.empty:\n",
" # nothing new → wait and retry\n",
" time.sleep(SLEEP_INTERVAL)\n",
" continue\n",
"\n",
" # 3) Download each new preview with a progress bar\n",
" for _, row in tqdm(df_new.iterrows(),\n",
" total=len(df_new),\n",
" desc=\"Downloading previews\",\n",
" unit=\"track\"):\n",
" track_id = row['track_id']\n",
" preview_url = row['preview']\n",
" out_path = os.path.join(DOWNLOAD_DIR, f\"{track_id}.mp3\")\n",
"\n",
" # mark as processed so we don't retry on crashes\n",
" processed.add(track_id)\n",
"\n",
" # skip if file already exists\n",
" if os.path.exists(out_path):\n",
" continue\n",
"\n",
" # attempt download\n",
" try:\n",
" resp = requests.get(preview_url, timeout=30)\n",
" if resp.status_code == 200:\n",
" with open(out_path, 'wb') as f:\n",
" f.write(resp.content)\n",
" else:\n",
" print(f\"HTTP {resp.status_code} for {track_id}\")\n",
" except Exception as e:\n",
" print(f\"Error downloading {track_id}: {e}\")\n",
"\n",
" # 4) Pause before next check\n",
" time.sleep(SLEEP_INTERVAL)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
File diff suppressed because it is too large Load Diff
+297
View File
@@ -0,0 +1,297 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Read out Data from Kaggle Dataset, get preview URL-s and save to file\n",
"\n",
"## this should be run only once"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 6%|▋ | 76/1183 [00:35<33:39, 1.82s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/64ffsubBonytxZc5fQJhdO\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 9%|▊ | 102/1183 [00:55<34:18, 1.90s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2Iu5wxKFiEEQDQK1Pldsis\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 9%|▉ | 111/1183 [01:03<33:10, 1.86s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/6syvS9gZzjB8b9DdKVhAJH\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 15%|█▌ | 180/1183 [01:54<53:30, 3.20s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2qrVR11O44iJ0DVTNCExjA\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 19%|█▉ | 225/1183 [02:25<29:37, 1.86s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3njPW0vttbjt5j1Elt6sJI\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 32%|███▏ | 381/1183 [03:26<23:39, 1.77s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3T7zNYia3nk9d8uXhO9Xud\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 53%|█████▎ | 630/1183 [05:23<16:28, 1.79s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/41Sfs0E8hr8w2BvzUtof4O\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 54%|█████▎ | 633/1183 [05:29<20:57, 2.29s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/3H9aA6IO5gfHW72m8YU8Iv\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 57%|█████▋ | 675/1183 [05:56<15:49, 1.87s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/0lvHnw9Exl8jLV3zuRsksJ\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 67%|██████▋ | 792/1183 [07:06<12:08, 1.86s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/17sSDGIRIkB0jOKb2cBURf\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 77%|███████▋ | 911/1183 [08:03<09:15, 2.04s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/5RcZ5jbBgKDdM6BuoSeh8P\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 77%|███████▋ | 912/1183 [08:08<13:32, 3.00s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/0YQrHOpi219lZA8SDly4iG\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 90%|█████████ | 1069/1183 [09:31<03:31, 1.85s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Failed to fetch Spotify preview URL: 504 Server Error: Gateway Timeout for url: https://open.spotify.com/embed/track/2iql0ydkQX1hZ375EyRFFF\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Fetching previews: 100%|██████████| 1183/1183 [10:19<00:00, 1.91it/s]\n"
]
}
],
"source": [
"import pandas as pd\n",
"import os\n",
"from spotify_preview import get_spotify_preview_url\n",
"from tqdm import tqdm\n",
"\n",
"# --- 0) Load & dedupe your balanced track/genre file ---\n",
"df = pd.read_csv('track_genres_balanced.csv')\n",
"df = df.drop_duplicates(subset=['track_id'])\n",
"df = df.dropna(subset=['genre'])\n",
"\n",
"# --- 1) Prep output CSV (header only once) ---\n",
"#output_csv = 'track_genre_balanced_url.csv'\n",
"#pd.DataFrame(columns=['track_id','genre','preview']).to_csv(output_csv, index=False)\n",
"#output_csv = pd.read_csv('track_genre_balanced_url.csv')\n",
"\n",
"output_csv = 'track_genre_balanced_url.csv'\n",
"\n",
"if os.path.exists(output_csv):\n",
" # load already-fetched track_ids and drop them from df\n",
" done = pd.read_csv(output_csv, usecols=['track_id'])\n",
" processed_ids = set(done['track_id'].astype(str))\n",
" df = df[~df['track_id'].astype(str).isin(processed_ids)]\n",
" write_header = False\n",
"else:\n",
" # new file → write header\n",
" pd.DataFrame(columns=['track_id','genre','preview']) \\\n",
" .to_csv(output_csv, index=False)\n",
" write_header = False # header is already there\n",
"\n",
"# --- 2) Parameters ---\n",
"BATCH_SIZE = 100 # how many tracks to process per “minibatch”\n",
"PAUSE = 0.1 # if you want a small sleep between API calls\n",
"\n",
"# --- 3) Loop with a single progress bar over all tracks ---\n",
"with tqdm(total=len(df), desc=\"Fetching previews\") as pbar:\n",
" for start in range(0, len(df), BATCH_SIZE):\n",
" chunk = df.iloc[start:start + BATCH_SIZE]\n",
" rows = []\n",
"\n",
" # 4) Pertrack lookup\n",
" for _, row in chunk.iterrows():\n",
" track_id = row['track_id']\n",
" genre = row['genre']\n",
"\n",
" preview = get_spotify_preview_url(track_id)\n",
" if preview:\n",
" rows.append({\n",
" 'track_id': track_id,\n",
" 'genre': genre,\n",
" 'preview': preview\n",
" })\n",
" # else: silently skip or print an error if you prefer\n",
"\n",
" pbar.update(1)\n",
" if PAUSE:\n",
" import time; time.sleep(PAUSE)\n",
"\n",
" # 5) Append this batchs hits to disk\n",
" if rows:\n",
" pd.DataFrame(rows).to_csv(\n",
" output_csv,\n",
" mode='a',\n",
" header=False,\n",
" index=False\n",
" )\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
+31 -10
View File
@@ -1,6 +1,5 @@
import base64 import base64
import json import json
import logging as log
import os import os
import time import time
from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import BaseHTTPRequestHandler, HTTPServer
@@ -9,7 +8,11 @@ from urllib.parse import parse_qs, urlencode, urlparse
import dotenv import dotenv
import requests import requests
TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json') from logger import LoggerWrapper
TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'tokens.json')
log = LoggerWrapper()
def simple_authenticate(grant_type: str = "client_credentials") -> str: def simple_authenticate(grant_type: str = "client_credentials") -> str:
@@ -32,13 +35,17 @@ def simple_authenticate(grant_type: str = "client_credentials") -> str:
"grant_type": f"{grant_type}" "grant_type": f"{grant_type}"
} }
response = requests.post(token_url, headers=headers, data=data) try:
response = requests.post(token_url, headers=headers, data=data)
except requests.exceptions.RequestException as e:
log.error(f"Error authenticating: {e}")
return None
if response.status_code == 200: if response.status_code == 200:
access_token = response.json().get('access_token') access_token = response.json().get('access_token')
return access_token return access_token
else: else:
log.error(f"Error {response.status_code}: {response.text}") log.error(f"Error authenticating {response.status_code}: {response.text}")
def authenticate(scope: str) -> str: def authenticate(scope: str) -> str:
@@ -101,10 +108,14 @@ def _read_env_file() -> tuple:
:return: tuple :return: tuple
""" """
current_dir = os.path.dirname(os.path.abspath(__file__)) try:
dotenv_folder_path = os.path.join(current_dir, 'env') current_dir = os.path.dirname(os.path.abspath(__file__))
dotenv_path = os.path.join(dotenv_folder_path, '.env') dotenv_folder_path = os.path.join(current_dir, '../config')
contents = dotenv.dotenv_values(dotenv_path=dotenv_path) dotenv_path = os.path.join(dotenv_folder_path, '.env')
contents = dotenv.dotenv_values(dotenv_path=dotenv_path)
except Exception as e:
log.error(f"Error reading the .env file: {e}")
return None
spotify_client_id = contents['SPOTIFY_CLIENT_ID'] spotify_client_id = contents['SPOTIFY_CLIENT_ID']
spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET'] spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET']
spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI'] spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI']
@@ -158,7 +169,12 @@ def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, clien
'client_secret': client_secret, 'client_secret': client_secret,
} }
response = requests.post(token_url, data=data, headers=headers) try:
response = requests.post(token_url, data=data, headers=headers)
except requests.exceptions.RequestException as e:
log.error(f"Error exchanging code for token: {e}")
return None
response_data = response.json() response_data = response.json()
if 'access_token' not in response_data: if 'access_token' not in response_data:
@@ -192,7 +208,12 @@ def _refresh_access_token(refresh_token: str, client_id: str, client_secret: str
'client_secret': client_secret, 'client_secret': client_secret,
} }
response = requests.post(token_url, data=data, headers=headers) try:
response = requests.post(token_url, data=data, headers=headers)
except requests.exceptions.RequestException as e:
log.error(f"Error refreshing access token: {e}")
return None
response_data = response.json() response_data = response.json()
if 'access_token' not in response_data: if 'access_token' not in response_data:
+34 -11
View File
@@ -1,7 +1,12 @@
import logging as log
import sqlite3 import sqlite3
from enum import Enum from enum import Enum
from logger import LoggerWrapper
# DATABASE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'spotify_scraped.db')
log = LoggerWrapper()
class Table(Enum): class Table(Enum):
TRACK_INFORMATION = "track_information" TRACK_INFORMATION = "track_information"
@@ -16,7 +21,7 @@ class Database:
A class to handle the database connection and operations A class to handle the database connection and operations
""" """
def __init__(self, db_name): def __init__(self, db_name: str):
"""Initialize the connection to the database""" """Initialize the connection to the database"""
self.db_name = db_name self.db_name = db_name
self.conn = sqlite3.connect(db_name) self.conn = sqlite3.connect(db_name)
@@ -60,8 +65,18 @@ class Database:
self.cursor.execute(f''' self.cursor.execute(f'''
CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} ( CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} (
track_id TEXT PRIMARY KEY, track_id TEXT PRIMARY KEY,
attribute_name TEXT, acousticness FLOAT,
attribute_value TEXT danceability FLOAT,
duration_ms INTEGER,
energy FLOAT,
instrumentalness FLOAT,
key INTEGER,
liveness FLOAT,
loudness FLOAT,
speechiness FLOAT,
tempo FLOAT,
time_signature INTEGER,
valence FLOAT
); );
''') ''')
@@ -73,12 +88,14 @@ class Database:
album_id TEXT, album_id TEXT,
FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id), FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id),
FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id), FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id),
FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id) FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id),
FOREIGN KEY (track_id) REFERENCES {Table.TRACK_ATTRIBUTES.value}(track_id)
); );
''') ''')
# Commit the changes # Commit the changes
self.conn.commit() self.conn.commit()
log.debug("Initialised tables")
def add_row(self, table: Table, values): def add_row(self, table: Table, values):
"""Add a new row into the specified table""" """Add a new row into the specified table"""
@@ -88,17 +105,22 @@ class Database:
self.cursor.execute(query, values) self.cursor.execute(query, values)
self.conn.commit() self.conn.commit()
except Exception as e: except Exception as e:
log.debug(f"Error: {e}") log.error(f"Error while inserting row into table {table.value}: {e}")
def read_all_rows(self, table: Table, column: str = "*"): def read_all_rows(self, table: Table, column: str = "*"):
"""Read all rows from the specified table""" """Read all rows from the specified table"""
self.cursor.execute(f"SELECT {column} FROM {table.value}") try:
rows = self.cursor.fetchall() self.cursor.execute(f"SELECT {column} FROM {table.value}")
return rows rows = self.cursor.fetchall()
return rows
except Exception as e:
log.error(f"Error while reading all rows from table {table.value}: {e}")
return []
def close(self): def close(self, message: str):
"""Close the database connection""" """Close the database connection"""
self.conn.close() self.conn.close()
log.info(f"Database connection closed from file: {message}")
def get_total_overview(self) -> list: def get_total_overview(self) -> list:
"""Retrieve a total overview of all recently played songs with full details""" """Retrieve a total overview of all recently played songs with full details"""
@@ -122,5 +144,6 @@ class Database:
rows = self.cursor.fetchall() rows = self.cursor.fetchall()
return rows return rows
except Exception as e: except Exception as e:
log.error(f"Error retrieving total overview: {e}") log.error(f"Error retrieving total overview: {e}"
f"\nQuery Executed: {query}")
return [] return []
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+149
View File
@@ -0,0 +1,149 @@
import json
import os
from auth import simple_authenticate
from database_handler import Database, Table
from logger import LoggerWrapper
from spotify_api import get_multiple_field_information
# Define the absolute folder path to the folder containing the gdrp retrieved data
folder_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', 'gdpr_data')
log = LoggerWrapper()
def _read_gdrp_data() -> list:
"""
This function reads all .json files in the folder containing the gdpr data.
This data is then extracted into a dict and sorted by timestamp ascending.
:return: all_songs_played: A dict with an items field containing all songs played for the user
"""
all_songs_played = []
try:
for filename in os.listdir(folder_path):
if filename.endswith('.json'):
file_path = os.path.join(folder_path, filename)
with open(file_path, 'r') as file:
data = json.load(file)
for entry in data:
# This removes all podcasts from the list
if entry['spotify_track_uri'] is None:
continue
try:
track = {
'timestamp': entry['ts'],
'id': _extract_id(entry['spotify_track_uri']),
'track_name': entry['master_metadata_track_name'],
'artist_name': entry['master_metadata_album_artist_name'],
'album_name': entry['master_metadata_album_album_name'],
'conn_country': entry['conn_country'],
'ms_played': entry['ms_played']
}
all_songs_played.append(track)
except Exception as e:
log.warning(f'Missing field from gdpr data: {e}')
except Exception as e:
log.error(f'Failed to read gdpr data: {e}')
all_songs_played = sorted(all_songs_played, key=lambda x: x['timestamp'])
return all_songs_played
def _extract_id(spotify_id: str) -> str:
"""
This function gets a id with extra details and extracts the id from it.
:param: id a string containing the id
:return: str the ID
"""
prefix = "spotify:track:"
prefix_removed_id = spotify_id[len(prefix):]
return prefix_removed_id
def _populate_ids(all_songs_played: list):
track_ids = []
all_songs_played_info = []
token = simple_authenticate()
processed_songs_id = set()
counter = 0
for entry in all_songs_played:
track_id = entry['id']
if track_id not in processed_songs_id:
track_ids.append(track_id)
processed_songs_id.add(track_id)
counter += 1
if (counter + 1) % 50 == 0 and len(track_ids) > 0:
track_ids_tuple = tuple(track_ids)
track_ids.clear()
response = get_multiple_field_information(token, 'tracks', 50, *track_ids_tuple)
all_songs_played_info.extend(_sort_and_create_required_dataset(response))
counter = 0
if len(track_ids) > 0:
track_ids_tuple = tuple(track_ids)
response = get_multiple_field_information(token, 'tracks', 50, *track_ids_tuple)
all_songs_played_info.extend(_sort_and_create_required_dataset(response))
return all_songs_played_info
def _sort_and_create_required_dataset(response) -> dict:
track_list = []
for entry in response['tracks']:
track_data = {
'track_id': entry['id'],
'album_id': entry['album']['id'],
'artist_id': entry['artists'][0]['id']
}
track_list.append(track_data)
return track_list
def _fill_missing_ids(all_songs_played, all_songs_catalogued):
# Create a dictionary to map track_id to artist_id and album_id
track_id_to_artist_album = {data['track_id']: {'album_id': data['album_id'], 'artist_id': data['artist_id']} for data in all_songs_catalogued}
# Now, we will update the original `tracks` list by adding artist_id and album_id
for track in all_songs_played:
track_info = track_id_to_artist_album.get(track['id'])
if track_info:
track['artist_id'] = track_info['artist_id']
track['album_id'] = track_info['album_id']
return all_songs_played
def _insert_data_into_db(db: Database, all_songs_played: list):
"""
This function takes a list of all played songs and inserts these into the database.
:param: all_songs_played list of all songs
"""
for entry in all_songs_played:
try:
db.add_row(Table.RECENTLY_PLAYED, (entry['timestamp'], entry['id'], entry['artist_id'], entry['album_id']))
except Exception as e:
log.error(f'Failed adding {entry} to database, error {e}')
def export_gdpr_data(db: Database, n_limit: int = 100) -> None:
all_songs_played = _read_gdrp_data()
all_songs_played = all_songs_played[-n_limit:]
all_songs_catalogued = _populate_ids(all_songs_played)
all_songs_played = _fill_missing_ids(all_songs_played, all_songs_catalogued)
_insert_data_into_db(db, all_songs_played)
+61
View File
@@ -0,0 +1,61 @@
import logging
import os
from logging.handlers import RotatingFileHandler
from pathlib import Path
class LoggerWrapper():
def __init__(self, logger_name: str = "standard_logger"):
self.logger = logging.getLogger(logger_name)
if not self.logger.handlers:
self.logger.setLevel(logging.DEBUG)
self.setup_logger()
def set_console_handler_to_debug(self):
for handler in self.logger.handlers:
if isinstance(handler, logging.StreamHandler):
handler.setLevel(logging.DEBUG)
def setup_logger(self):
# Define and create folder
logs_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'logs')
Path(logs_folder).mkdir(parents=True, exist_ok=True)
# Define file path
log_file = log_file = os.path.join(logs_folder, 'predictify.log')
# Setup File Handler
handler = RotatingFileHandler(log_file, maxBytes=1000000, backupCount=5)
handler.setLevel(logging.DEBUG)
# Setup Console Handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.WARNING)
# Setup Formatter
formatter = logging.Formatter('%(asctime)s - [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s')
# Add Formatters to Handlers
handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# Add Handlers to Logger
self.logger.addHandler(handler)
self.logger.addHandler(console_handler)
def info(self, message):
self.logger.info(message)
def debug(self, message):
self.logger.debug(message)
def warning(self, message):
self.logger.warning(message)
def error(self, message):
self.logger.error(message)
def critical(self, message):
self.logger.critical(message)
# Here we can add alerting/handling
+70 -3
View File
@@ -1,8 +1,75 @@
import argparse
import atexit
import os
import sys
import traceback
from time import sleep from time import sleep
from scraper import scraping from database_handler import Database
from gdpr_export import export_gdpr_data
from logger import LoggerWrapper
from scraper import scrape_missing_infos, scraping
log = LoggerWrapper()
def _handle_exit():
"""
Function to log exit information if the script ends unexpectedly.
"""
log.critical("Script terminated unexpectedly.")
def _log_crash_info(exc_type, exc_value, exc_tb):
"""Custom function to log crash info when an exception occurs."""
log.critical("A critical error occurred!", exc_info=(exc_type, exc_value, exc_tb))
log.critical("Exception type: %s", exc_type)
log.critical("Exception message: %s", exc_value)
log.critical("Stack trace:\n%s", ''.join(traceback.format_tb(exc_tb)))
# Register the exit handler and excepthook
atexit.register(_handle_exit)
sys.excepthook = _log_crash_info
# Initialize the parser
parser = argparse.ArgumentParser(description="A python script written in Python3.13 which continuously checks what spotify songs "
"the user is listening to and logging these in a local database. \n"
"The Script also has a export function where it can read out the gdpr data exported by the user.")
# Add optional arguments
parser.add_argument('--verbose', '-v', action='store_true', help="Enable verbose output")
parser.add_argument('--export', type=str, choices=['TEST', 'PRODUCTION'], required=True,
help="Export the gdpr data from spotify if not done already. Choose between TEST and PRODUCTION."
"TEST will export only a small number of songs, PRODUCTION will export all songs.")
# Parse the arguments
args = parser.parse_args()
if args.verbose:
log.set_console_handler_to_debug()
log.info('Enabled verbose mode')
db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'data', f'spotify_scrape_{args.export}.db')
if args.export == 'TEST':
export_size = 10000
log.info(f'Scraping GDPR Data. Sample size: {export_size}')
db = Database(db_path)
export_gdpr_data(db, export_size)
scrape_missing_infos(db)
elif args.export == 'PRODUCTION':
export_size = 1000000
log.info('Scraping all GDPR Data.')
db = Database(db_path)
export_gdpr_data(db, export_size)
scrape_missing_infos(db)
else:
raise ValueError('Invalid export type. Please choose between TEST and PRODUCTION.')
# Run forever on intervals of 30 minutes
while True: while True:
scraping() log.info('Scraping API...')
scraping(db)
log.info('Done scraping API. Sleeping for 30 minutes...')
sleep(1800) sleep(1800)
+106 -125
View File
@@ -1,150 +1,131 @@
import requests
from auth import authenticate, simple_authenticate from auth import authenticate, simple_authenticate
from database_handler import Database, Table from database_handler import Database, Table
from logger import LoggerWrapper
from spotify_api import get_last_played_track, get_multiple_field_information
db = Database('spotify_scraped.db') log = LoggerWrapper()
def scraping(): def scraping(db: Database) -> None:
""" """
This function is the main function that will be executed when the script is run This function is the main function that will be executed when the script is run
""" """
global db
scope = "user-read-recently-played" scope = "user-read-recently-played"
bearer_token = authenticate(scope) bearer_token = authenticate(scope)
# Once each 30 mins _read_recently_played_page_and_add_to_db(db, bearer_token)
_read_recently_played_page_and_add_to_db(bearer_token=bearer_token) scrape_missing_infos(db)
_scrape_missing_infos()
db.close()
def _read_recently_played_page_and_add_to_db(bearer_token: str): def _read_recently_played_page_and_add_to_db(db: Database, bearer_token: str) -> None:
""" """
""" This function gets a list of song play history and adds it into the database.
global db
last_played_track = _get_last_played_track(bearer_token=bearer_token)
for track in last_played_track['items']:
track_id = track['track']['id']
played_at = track['played_at']
album_id = track['track']['album']['id']
artist_id = track['track']['artists'][0]['id']
db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict:
"""
This function returns the last played track based on the limit size
:param limit: str
:param bearer_token: str
:return: dict
""" """
header = { last_played_track = get_last_played_track(bearer_token=bearer_token)
'Authorization': f'Bearer {bearer_token}'
}
response = requests.get(url, headers=header) try:
response_json = response.json() for track in reversed(last_played_track['items']):
return response_json track_id = track['track']['id']
played_at = track['played_at']
album_id = track['track']['album']['id']
artist_id = track['track']['artists'][0]['id']
db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
except Exception as e:
log.error(f"Failed to add returned play history to database: {e}"
f"\nReturned Value: {last_played_track}")
def _get_track_information(track_id: str, bearer_token: str) -> dict: def scrape_missing_infos(db: Database) -> None:
"""
This function returns the track information based on the track id
:param track_id: str
:param bearer_token: str
:return: dict
""" """
url = f"https://api.spotify.com/v1/tracks/{track_id}"
header = {
'Authorization': f'Bearer {bearer_token}'
}
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
def _get_artist_information(artist_id: str, bearer_token: str) -> dict:
""" """
This function returns the artist information based on the artist id
:param artist_id: str
:param bearer_token: str
:return: dict
"""
url = f"https://api.spotify.com/v1/artists/{artist_id}"
header = {
'Authorization': f'Bearer {bearer_token}'
}
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
def _get_album_information(album_id: str, bearer_token: str) -> dict:
"""
This function returns the album information based on the album id
:param album_id: str
:param bearer_token: str
:return: dict
"""
url = f"https://api.spotify.com/v1/albums/{album_id}"
header = {
'Authorization': f'Bearer {bearer_token}'
}
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
def _scrape_missing_infos():
"""
"""
global db
bearer_token_simple = simple_authenticate() bearer_token_simple = simple_authenticate()
# Track Info _process_missing_info(db, bearer_token_simple, Table.TRACK_INFORMATION, 'track_id', 'tracks')
all_track_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id') _process_missing_info(db, bearer_token_simple, Table.ALBUM_INFORMATION, 'album_id', 'albums')
all_track_ids_saved = db.read_all_rows(Table.TRACK_INFORMATION, 'track_id') _process_missing_info(db, bearer_token_simple, Table.ARTIST_INFORMATION, 'artist_id', 'artists')
all_track_ids_missing = list(set(all_track_ids_recently_played) - set(all_track_ids_saved)) # _process_missing_info(db, bearer_token_simple, Table.TRACK_ATTRIBUTES, 'track_id', 'audio-features')
for track_id in all_track_ids_missing:
response = _get_track_information(track_id=track_id[0], bearer_token=bearer_token_simple)
db.add_row(Table.TRACK_INFORMATION, (response['id'], response['name'], response['duration_ms'], response['explicit'], response['popularity'])) def _process_missing_info(db: Database, bearer_token_simple: str, table_name: Table, id_field_name: str, endpoint_name: str) -> None:
# Album Info
all_album_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'album_id') if endpoint_name == 'albums':
all_album_ids_saved = db.read_all_rows(Table.ALBUM_INFORMATION, 'album_id') limit = 20
all_album_ids_missing = list(set(all_album_ids_recently_played) - set(all_album_ids_saved)) elif endpoint_name == 'audio-features':
for album_id in all_album_ids_missing: limit = 100
response = _get_album_information(album_id=album_id[0], bearer_token=bearer_token_simple) else:
try: limit = 50
release_year = response['release_date'][:4]
except Exception: all_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, id_field_name)
release_year = "" all_ids_saved = db.read_all_rows(table_name, id_field_name)
db.add_row(Table.ALBUM_INFORMATION, (response['id'], response['name'], response['album_type'], response['total_tracks'], release_year, response['label'])) all_ids_missing = list(set(all_ids_recently_played) - set(all_ids_saved))
# Artist Info
all_artist_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'artist_id') log.debug(f"Number of missing {table_name.name} entries: {len(all_ids_missing)}. Inserting...")
all_artist_ids_saved = db.read_all_rows(Table.ARTIST_INFORMATION, 'artist_id')
all_artist_ids_missing = list(set(all_artist_ids_recently_played) - set(all_artist_ids_saved)) ids = []
for artist_id in all_artist_ids_missing: processed_ids = set()
response = _get_artist_information(artist_id=artist_id[0], bearer_token=bearer_token_simple)
try: counter = 0
genre = response['genres'][0]
except IndexError: for id_value in all_ids_missing:
genre = ""
db.add_row(Table.ARTIST_INFORMATION, (response['id'], response['name'], response['followers']['total'], genre, response['popularity'])) id_value_str = id_value[0]
if id_value_str not in processed_ids:
ids.append(id_value_str)
processed_ids.add(id_value_str)
counter += 1
if (counter + 1) % limit == 0 and len(ids) > 0:
ids_tuple = tuple(ids)
ids.clear()
response = get_multiple_field_information(bearer_token_simple, endpoint_name, limit, *ids_tuple)
_add_data_to_database(db, table_name, response)
counter = 0
if len(ids) > 0:
ids_tuple = tuple(ids)
ids.clear()
response = get_multiple_field_information(bearer_token_simple, endpoint_name, limit, *ids_tuple)
_add_data_to_database(db, table_name, response)
def _add_data_to_database(db: Database, table_name: Table, response) -> None:
if table_name == Table.TRACK_INFORMATION:
log.debug('Adding track information to database')
for entry in response['tracks']:
log.debug(f"Adding track: {entry['name']}")
db.add_row(table_name, (entry['id'], entry['name'], entry['duration_ms'], entry['explicit'], entry['popularity']))
elif table_name == Table.ALBUM_INFORMATION:
log.debug('Adding album information to database')
for entry in response['albums']:
log.debug(f"Adding album: {entry['name']}")
try:
release_year = entry['release_date'][:4]
except Exception:
release_year = ""
db.add_row(table_name, (entry['id'], entry['name'], entry['album_type'], entry['total_tracks'], release_year, entry['label']))
elif table_name == Table.ARTIST_INFORMATION:
log.debug('Adding artist information to database')
for entry in response['artists']:
log.debug(f"Adding artist: {entry['name']}")
try:
genre = entry['genres'][0]
except IndexError:
genre = ""
db.add_row(Table.ARTIST_INFORMATION, (entry['id'], entry['name'], entry['followers']['total'], genre, entry['popularity']))
elif table_name == Table.TRACK_ATTRIBUTES:
log.debug('Adding track attributes to database')
for entry in response['audio_features']:
log.debug(f"Adding track attributes: {entry['id']}")
try:
db.add_row(Table.TRACK_ATTRIBUTES, (entry['id'], entry['aucousticness'], entry['danceability'], entry['duration_ms'], entry['energy'], entry['instrumentalness'], entry['key'], entry['liveness'], entry['loudness'], entry['speechiness'], entry['tempo'], entry['time_signature'], entry['valence']))
except Exception as e:
log.error(f"Failed to add track attributes to database: {e}"
f"\nReturned Value: {response}")
+140
View File
@@ -0,0 +1,140 @@
from typing import Union
import requests
from logger import LoggerWrapper
log = LoggerWrapper()
def get_last_played_track(bearer_token: str, url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50") -> Union[dict, None]:
"""
This function returns the last played track based on the limit size
:param limit: str
:param bearer_token: str
:return: dict
"""
header = {
'Authorization': f'Bearer {bearer_token}'
}
try:
log.debug(f"GET Request: {url}")
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
except requests.exceptions.RequestException as e:
log.error(f"Error in get_last_played_track: {e}")
return None
def get_track_information(track_id: str, bearer_token: str) -> Union[dict, None]:
"""
This function returns the track information based on the track id
:param track_id: str
:param bearer_token: str
:return: dict
"""
url = f"https://api.spotify.com/v1/tracks/{track_id}"
header = {
'Authorization': f'Bearer {bearer_token}'
}
try:
log.debug(f"GET Request: {url}")
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
except requests.exceptions.RequestException as e:
log.error(f"Error in get_track_information: {e}")
return None
def get_artist_information(artist_id: str, bearer_token: str) -> Union[dict, None]:
"""
This function returns the artist information based on the artist id
:param artist_id: str
:param bearer_token: str
:return: dict
"""
url = f"https://api.spotify.com/v1/artists/{artist_id}"
header = {
'Authorization': f'Bearer {bearer_token}'
}
try:
log.debug(f"GET Request: {url}")
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
except requests.exceptions.RequestException as e:
log.error(f"Error in get_artist_information: {e}")
return None
def get_album_information(album_id: str, bearer_token: str) -> Union[dict, None]:
"""
This function returns the album information based on the album id
:param album_id: str
:param bearer_token: str
:return: dict
"""
url = f"https://api.spotify.com/v1/albums/{album_id}"
header = {
'Authorization': f'Bearer {bearer_token}'
}
try:
log.debug(f"GET Request: {url}")
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
except requests.exceptions.RequestException as e:
log.error(f"Error in get_album_information: {e}")
return None
def get_multiple_field_information(bearer_token: str, api_type: str, limit: int, *track_ids) -> Union[dict, None]:
"""
This function returns the track information based on the track id
:param *track_id: str
:param bearer_token: str
:return: dict
"""
if len(track_ids) > limit:
log.error(f'exceeding the limit if ids {limit} for endpoint {api_type}')
return None
url_suffix = "ids="
separator = ","
try:
for track_id in track_ids:
url_suffix = url_suffix + track_id + separator
except Exception as e:
log.error(f"Failed setting up the url for multiple ids request."
f"Error: {e}")
return None
url = f"https://api.spotify.com/v1/{api_type}?{url_suffix}"
url = url[:-len(separator)]
header = {
'Authorization': f'Bearer {bearer_token}'
}
try:
log.debug(f"GET Request: {url}")
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
except requests.exceptions.RequestException as e:
log.error(f"Error in get_multiple_field_information: {e}")
return None
-14
View File
@@ -1,14 +0,0 @@
#!/bin/sh
#
# Starup the predictify scraper
if test -f ./requirements.txt
then
python3 -m venv .venv
.venv/bin/pip install -r ./requirements.txt
else
printf "Missing requirements file! aborting...\n"
exit 1
fi
.venv/bin/python3 src/runtime.py