Merge branch 'staging' into feat/containerize_app

This commit is contained in:
Chris Kiriakou
2025-03-19 17:55:35 +01:00
11 changed files with 531 additions and 3 deletions
+13 -2
View File
@@ -1,3 +1,15 @@
# Test running file
main_test.py
# databases
*.db
# Custom Tokens file/rotator
tokens.json
# Visual Studio Code
.vscode/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
@@ -129,10 +141,9 @@ celerybeat.pid
# Environments
.env
!.env.example
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
+49
View File
@@ -0,0 +1,49 @@
# .pre-commit-config.yaml
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace # Remove trailing whitespace
exclude: '.*test.*'
files: \.(py)$
- id: end-of-file-fixer # Ensure a single newline at the end of a file
exclude: '.*test.*'
files: \.(py)$
- id: check-yaml # Check if the YAML files are valid
exclude: '.*test.*'
files: \.(yaml|yml)$
- id: check-json # Check if the JSON files are valid
exclude: '.*test.*'
files: \.(json)$
- id: check-added-large-files # Prevent large files from being committed
args: ['--maxkb=1000']
- id: check-ast # Check for parse errors in Python files
exclude: '.*test.*'
files: \.(py)$
- id: debug-statements # Check for print statements and pdb calls
exclude: '.*test.*'
files: \.(py)$
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
args: ['--profile=black']
files: \.(py)$
exclude: '.*test.*'
- repo: https://github.com/PyCQA/flake8
rev: 7.1.1
hooks:
- id: flake8
args: ['--extend-ignore=E501,E402,W503,E721','--max-line-length=100']
files: \.(py)$
exclude: '.*test.*'
+27 -1
View File
@@ -1 +1,27 @@
# predictify
# Predictify
## Overview
A Data analysis tool to scrape your Spotify History usage and let a ML-Model predict your next songs
## Authentication API
[Official Documentation](https://developer.spotify.com/documentation/web-api/tutorials/getting-started)
[Authorization Code Flow](https://developer.spotify.com/documentation/web-api/tutorials/code-flow)
## Usable possible APIs
Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)
Get Track: /tracks/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)
Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)
Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)
Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
## Authors
[Chris Kiriakou](https://github.com/ckiri)
[Dominik Agres](https://github.com/agresdominik)
+5
View File
@@ -0,0 +1,5 @@
# pytest.ini
[pytest]
# Set the root directory to the current directory (.)
rootdir = .
pythonpath = .
+6
View File
@@ -0,0 +1,6 @@
python-dotenv==1.0.1
requests==2.32.3
pre-commit==4.1.0
pytest==8.3.5
coverage==7.7.0
pytest-cov==6.0.0
+256
View File
@@ -0,0 +1,256 @@
import base64
import json
import os
import time
from http.server import BaseHTTPRequestHandler, HTTPServer
from urllib.parse import parse_qs, urlencode, urlparse
import dotenv
import requests
TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json')
def simple_authenticate(grant_type: str = "client_credentials") -> str:
"""
This function authenticates the user and returns the access token
:return: str
"""
spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file()
token_url = "https://accounts.spotify.com/api/token"
auth_value = f"{spotify_client_id}:{spotify_client_secret}"
auth_header = base64.b64encode(auth_value.encode('utf-8')).decode('utf-8')
headers = {
"Authorization": f"Basic {auth_header}",
"Content-Type": "application/x-www-form-urlencoded"
}
data = {
"grant_type": f"{grant_type}"
}
response = requests.post(token_url, headers=headers, data=data)
if response.status_code == 200:
access_token = response.json().get('access_token')
return access_token
else:
print(f"Error {response.status_code}: {response.text}")
def authenticate(scope: str) -> str:
"""
This function authenticates the user and returns the access token
:param scope: str
:return: str
"""
spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file()
tokens = _load_tokens(scope)
if tokens:
access_token, refresh_token, expires_at = tokens
if time.time() < expires_at:
return access_token
else:
print(f"Token for scope {scope} expired, refreshing...")
access_token, expires_at = _refresh_access_token(refresh_token, spotify_client_id, spotify_client_secret)
_refresh_tokens_file(access_token, scope, expires_at)
return access_token
auth_url = _get_authorization_url(spotify_client_id, spotify_redirect_uri, scope)
print(f'Please go to the following URL to authorize the app: {auth_url}')
authorization_code = _start_server_and_wait_for_code()
access_token, refresh_token, expires_at = _exchange_code_for_token(authorization_code, redirect_uri=spotify_redirect_uri,
client_id=spotify_client_id, client_secret=spotify_client_secret)
_save_tokens(access_token, refresh_token, scope, expires_at)
return access_token
def _get_authorization_url(client_id: str, redirect_uri: str, scope: str) -> str:
"""
This function generates the URL that the user needs to visit to authorize the app
:param client_id: str
:param redirect_uri: str
:param scope: str
:return: str
"""
auth_params = {
"response_type": "code",
"client_id": client_id,
"scope": scope,
"redirect_uri": redirect_uri,
"state": str(int(time.time()))
}
auth_url = "https://accounts.spotify.com/authorize?" + urlencode(auth_params)
return auth_url
def _read_env_file() -> tuple:
"""
This function reads the .env file and returns the client_id, client_secret and redirect_uri
:return: tuple
"""
current_dir = os.path.dirname(os.path.abspath(__file__))
dotenv_folder_path = os.path.join(current_dir, 'env')
dotenv_path = os.path.join(dotenv_folder_path, '.env')
contents = dotenv.dotenv_values(dotenv_path=dotenv_path)
spotify_client_id = contents['SPOTIFY_CLIENT_ID']
spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET']
spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI']
return spotify_client_id, spotify_client_secret, spotify_redirect_uri
def _start_server_and_wait_for_code() -> any:
"""
This function starts a server and waits for the user to visit the authorization URL
and get the authorization code
:return: any
"""
class CallbackHandler(BaseHTTPRequestHandler):
def do_GET(self):
parsed_url = urlparse(self.path)
query_params = parse_qs(parsed_url.query)
if 'code' in query_params:
self.server.authorization_code = query_params['code'][0]
self.send_response(200)
self.end_headers()
self.wfile.write(b"Authorization successful! You can close this window.")
server = HTTPServer(('localhost', 8888), CallbackHandler)
print("Starting server to capture the authorization code...")
server.handle_request()
return server.authorization_code
def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, client_secret: str) -> tuple:
"""
This function exchanges the authorization code for an access token
:param code: str
:param redirect_uri: str
:param client_id: str
:param client_secret: str
:return: tuple
"""
token_url = "https://accounts.spotify.com/api/token"
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}
data = {
'grant_type': 'authorization_code',
'code': code,
'redirect_uri': redirect_uri,
'client_id': client_id,
'client_secret': client_secret,
}
response = requests.post(token_url, data=data, headers=headers)
response_data = response.json()
if 'access_token' not in response_data:
raise Exception("Failed to get access token")
access_token = response_data['access_token']
refresh_token = response_data.get('refresh_token', None)
expires_in = response_data['expires_in']
expires_at = time.time() + expires_in
return access_token, refresh_token, expires_at
def _refresh_access_token(refresh_token: str, client_id: str, client_secret: str) -> tuple:
"""
Refreshes the access token using the refresh token.
:param refresh_token: str
:param client_id: str
:param client_secret: str
:return: tuple
"""
token_url = "https://accounts.spotify.com/api/token"
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}
data = {
'grant_type': 'refresh_token',
'refresh_token': refresh_token,
'client_id': client_id,
'client_secret': client_secret,
}
response = requests.post(token_url, data=data, headers=headers)
response_data = response.json()
if 'access_token' not in response_data:
raise Exception("Failed to refresh access token")
access_token = response_data['access_token']
expires_in = response_data['expires_in']
expires_at = time.time() + expires_in
return access_token, expires_at
def _load_tokens(scope: str) -> tuple:
"""
Loads the tokens from the local file if they exist and are still valid.
:return: tuple or None
"""
if os.path.exists(TOKEN_FILE_PATH):
with open(TOKEN_FILE_PATH, 'r') as f:
tokens = json.load(f)
if scope in tokens:
if 'access_token' in tokens[scope] and 'expires_at' in tokens[scope] and 'expires_at' in tokens[scope]:
return tokens[scope]['access_token'], tokens[scope]['refresh_token'], tokens[scope]['expires_at']
return None
def _save_tokens(access_token: str, refresh_token: str, scope: str, expires_at) -> None:
"""
Saves the access and refresh tokens to a local file.
:param access_token: str
:param refresh_token: str
:param scope: str
"""
tokens = {
scope: {
'access_token': access_token,
'refresh_token': refresh_token,
'expires_at': expires_at
},
}
with open(TOKEN_FILE_PATH, 'w') as f:
json.dump(tokens, f)
def _refresh_tokens_file(access_token: str, scope: str, expires_at) -> None:
"""
Saves the access and refresh tokens to a local file.
:param access_token: str
:param scope: str
"""
with open(TOKEN_FILE_PATH, 'r') as file:
tokens = json.load(file)
if scope in tokens and 'refresh_token' in tokens[scope]:
tokens[scope]['access_token'] = access_token
tokens[scope]['expires_at'] = expires_at
with open(TOKEN_FILE_PATH, 'w') as file:
json.dump(tokens, file, indent=4)
else:
print(f"Error: Scope '{scope}' or refresh_token not found in the tokens file.")
+87
View File
@@ -0,0 +1,87 @@
import sqlite3
from enum import Enum
class Table(Enum):
TRACK_INFORMATION = "track_information"
ARTIST_INFORMATION = "artist_information"
ALBUM_INFORMATION = "album_information"
TRACK_ATTRIBUTES = "track_attributes"
RECENTLY_PLAYED = "recently_played"
class Database:
"""
A class to handle the database connection and operations
"""
def __init__(self, db_name):
"""Initialize the connection to the database"""
self.db_name = db_name
self.conn = sqlite3.connect(db_name)
self.cursor = self.conn.cursor()
self.create_tables()
def create_tables(self):
"""Create the tables in the database"""
self.cursor.execute(f'''
CREATE TABLE IF NOT EXISTS {Table.TRACK_INFORMATION.value} (
track_id TEXT PRIMARY KEY,
title TEXT
);
''')
self.cursor.execute(f'''
CREATE TABLE IF NOT EXISTS {Table.ARTIST_INFORMATION.value} (
artist_id TEXT PRIMARY KEY,
artist_name TEXT
);
''')
self.cursor.execute(f'''
CREATE TABLE IF NOT EXISTS {Table.ALBUM_INFORMATION.value} (
album_id TEXT PRIMARY KEY,
album_name TEXT
);
''')
self.cursor.execute(f'''
CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} (
track_id TEXT PRIMARY KEY,
attribute_name TEXT,
attribute_value TEXT
);
''')
self.cursor.execute(f'''
CREATE TABLE IF NOT EXISTS {Table.RECENTLY_PLAYED.value} (
played_at TIMESTAMP PRIMARY KEY,
track_id TEXT,
artist_id TEXT,
album_id TEXT,
FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id),
FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id),
FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id)
);
''')
# Commit the changes
self.conn.commit()
def add_row(self, table: Table, values):
"""Add a new row into the specified table"""
placeholders = ', '.join(['?'] * len(values))
query = f"INSERT INTO {table.value} VALUES ({placeholders})"
self.cursor.execute(query, values)
self.conn.commit()
def read_all_rows(self, table: Table, column: str = "*"):
"""Read all rows from the specified table"""
self.cursor.execute(f"SELECT {column} FROM {table.value}")
rows = self.cursor.fetchall()
return rows
def close(self):
"""Close the database connection"""
self.conn.close()
+3
View File
@@ -0,0 +1,3 @@
SPOTIFY_CLIENT_ID=your_token_here
SPOTIFY_CLIENT_SECRET=your_token_here
SPOTIFY_REDIRECT_URI=http://localhost:8888/callback
+85
View File
@@ -0,0 +1,85 @@
import requests
from auth import authenticate, simple_authenticate
from database_handler import Database, Table
db = Database('spotify_scraped.db')
def main():
"""
This function is the main function that will be executed when the script is run
"""
global db
scope = "user-read-recently-played"
bearer_token = authenticate(scope)
# Once each 30 mins
_read_recently_played_page_and_add_to_db(bearer_token=bearer_token)
# Once a day
all_track_ids = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id')
bearer_toke_simple = simple_authenticate()
for track_id in all_track_ids:
response = _get_track_information(track_id=track_id[0], bearer_token=bearer_toke_simple)
print(response)
# Close the database connection
db.close()
def _read_recently_played_page_and_add_to_db(bearer_token: str):
"""
"""
global db
last_played_track = _get_last_played_track(bearer_token=bearer_token)
for track in last_played_track['items']:
track_id = track['track']['id']
played_at = track['played_at']
album_id = track['track']['album']['id']
artist_id = track['track']['artists'][0]['id']
db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict:
"""
This function returns the last played track based on the limit size
:param limit: str
:param bearer_token: str
:return: dict
"""
header = {
'Authorization': f'Bearer {bearer_token}'
}
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
def _get_track_information(track_id: str, bearer_token: str) -> dict:
"""
This function returns the track information based on the track id
:param track_id: str
:param bearer_token: str
:return: dict
"""
url = f"https://api.spotify.com/v1/tracks/{track_id}"
header = {
'Authorization': f'Bearer {bearer_token}'
}
response = requests.get(url, headers=header)
response_json = response.json()
return response_json
if __name__ == '__main__':
main()
View File
View File