mirror of
https://github.com/agresdominik/predictify.git
synced 2026-04-21 17:55:49 +00:00
Merge branch 'staging' into feat/containerize_app
This commit is contained in:
+13
-2
@@ -1,3 +1,15 @@
|
||||
# Test running file
|
||||
main_test.py
|
||||
|
||||
# databases
|
||||
*.db
|
||||
|
||||
# Custom Tokens file/rotator
|
||||
tokens.json
|
||||
|
||||
# Visual Studio Code
|
||||
.vscode/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
@@ -129,10 +141,9 @@ celerybeat.pid
|
||||
|
||||
# Environments
|
||||
.env
|
||||
!.env.example
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
# .pre-commit-config.yaml
|
||||
|
||||
repos:
|
||||
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace # Remove trailing whitespace
|
||||
exclude: '.*test.*'
|
||||
files: \.(py)$
|
||||
|
||||
- id: end-of-file-fixer # Ensure a single newline at the end of a file
|
||||
exclude: '.*test.*'
|
||||
files: \.(py)$
|
||||
|
||||
- id: check-yaml # Check if the YAML files are valid
|
||||
exclude: '.*test.*'
|
||||
files: \.(yaml|yml)$
|
||||
|
||||
- id: check-json # Check if the JSON files are valid
|
||||
exclude: '.*test.*'
|
||||
files: \.(json)$
|
||||
|
||||
- id: check-added-large-files # Prevent large files from being committed
|
||||
args: ['--maxkb=1000']
|
||||
|
||||
- id: check-ast # Check for parse errors in Python files
|
||||
exclude: '.*test.*'
|
||||
files: \.(py)$
|
||||
|
||||
- id: debug-statements # Check for print statements and pdb calls
|
||||
exclude: '.*test.*'
|
||||
files: \.(py)$
|
||||
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.13.2
|
||||
hooks:
|
||||
- id: isort
|
||||
args: ['--profile=black']
|
||||
files: \.(py)$
|
||||
exclude: '.*test.*'
|
||||
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 7.1.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
args: ['--extend-ignore=E501,E402,W503,E721','--max-line-length=100']
|
||||
files: \.(py)$
|
||||
exclude: '.*test.*'
|
||||
@@ -1 +1,27 @@
|
||||
# predictify
|
||||
# Predictify
|
||||
|
||||
## Overview
|
||||
|
||||
A Data analysis tool to scrape your Spotify History usage and let a ML-Model predict your next songs
|
||||
|
||||
## Authentication API
|
||||
|
||||
[Official Documentation](https://developer.spotify.com/documentation/web-api/tutorials/getting-started)
|
||||
[Authorization Code Flow](https://developer.spotify.com/documentation/web-api/tutorials/code-flow)
|
||||
|
||||
## Usable possible APIs
|
||||
|
||||
Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)
|
||||
|
||||
Get Track: /tracks/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)
|
||||
|
||||
Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)
|
||||
|
||||
Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)
|
||||
|
||||
Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
|
||||
|
||||
## Authors
|
||||
|
||||
[Chris Kiriakou](https://github.com/ckiri)
|
||||
[Dominik Agres](https://github.com/agresdominik)
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
# pytest.ini
|
||||
[pytest]
|
||||
# Set the root directory to the current directory (.)
|
||||
rootdir = .
|
||||
pythonpath = .
|
||||
@@ -0,0 +1,6 @@
|
||||
python-dotenv==1.0.1
|
||||
requests==2.32.3
|
||||
pre-commit==4.1.0
|
||||
pytest==8.3.5
|
||||
coverage==7.7.0
|
||||
pytest-cov==6.0.0
|
||||
+256
@@ -0,0 +1,256 @@
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from urllib.parse import parse_qs, urlencode, urlparse
|
||||
|
||||
import dotenv
|
||||
import requests
|
||||
|
||||
TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json')
|
||||
|
||||
|
||||
def simple_authenticate(grant_type: str = "client_credentials") -> str:
|
||||
"""
|
||||
This function authenticates the user and returns the access token
|
||||
|
||||
:return: str
|
||||
"""
|
||||
spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file()
|
||||
token_url = "https://accounts.spotify.com/api/token"
|
||||
auth_value = f"{spotify_client_id}:{spotify_client_secret}"
|
||||
auth_header = base64.b64encode(auth_value.encode('utf-8')).decode('utf-8')
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Basic {auth_header}",
|
||||
"Content-Type": "application/x-www-form-urlencoded"
|
||||
}
|
||||
|
||||
data = {
|
||||
"grant_type": f"{grant_type}"
|
||||
}
|
||||
|
||||
response = requests.post(token_url, headers=headers, data=data)
|
||||
|
||||
if response.status_code == 200:
|
||||
access_token = response.json().get('access_token')
|
||||
return access_token
|
||||
else:
|
||||
print(f"Error {response.status_code}: {response.text}")
|
||||
|
||||
|
||||
def authenticate(scope: str) -> str:
|
||||
"""
|
||||
This function authenticates the user and returns the access token
|
||||
|
||||
:param scope: str
|
||||
:return: str
|
||||
"""
|
||||
spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file()
|
||||
|
||||
tokens = _load_tokens(scope)
|
||||
if tokens:
|
||||
access_token, refresh_token, expires_at = tokens
|
||||
if time.time() < expires_at:
|
||||
return access_token
|
||||
else:
|
||||
print(f"Token for scope {scope} expired, refreshing...")
|
||||
access_token, expires_at = _refresh_access_token(refresh_token, spotify_client_id, spotify_client_secret)
|
||||
_refresh_tokens_file(access_token, scope, expires_at)
|
||||
return access_token
|
||||
|
||||
auth_url = _get_authorization_url(spotify_client_id, spotify_redirect_uri, scope)
|
||||
print(f'Please go to the following URL to authorize the app: {auth_url}')
|
||||
|
||||
authorization_code = _start_server_and_wait_for_code()
|
||||
|
||||
access_token, refresh_token, expires_at = _exchange_code_for_token(authorization_code, redirect_uri=spotify_redirect_uri,
|
||||
client_id=spotify_client_id, client_secret=spotify_client_secret)
|
||||
|
||||
_save_tokens(access_token, refresh_token, scope, expires_at)
|
||||
|
||||
return access_token
|
||||
|
||||
|
||||
def _get_authorization_url(client_id: str, redirect_uri: str, scope: str) -> str:
|
||||
"""
|
||||
This function generates the URL that the user needs to visit to authorize the app
|
||||
|
||||
:param client_id: str
|
||||
:param redirect_uri: str
|
||||
:param scope: str
|
||||
:return: str
|
||||
"""
|
||||
|
||||
auth_params = {
|
||||
"response_type": "code",
|
||||
"client_id": client_id,
|
||||
"scope": scope,
|
||||
"redirect_uri": redirect_uri,
|
||||
"state": str(int(time.time()))
|
||||
}
|
||||
auth_url = "https://accounts.spotify.com/authorize?" + urlencode(auth_params)
|
||||
return auth_url
|
||||
|
||||
|
||||
def _read_env_file() -> tuple:
|
||||
"""
|
||||
This function reads the .env file and returns the client_id, client_secret and redirect_uri
|
||||
|
||||
:return: tuple
|
||||
"""
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
dotenv_folder_path = os.path.join(current_dir, 'env')
|
||||
dotenv_path = os.path.join(dotenv_folder_path, '.env')
|
||||
contents = dotenv.dotenv_values(dotenv_path=dotenv_path)
|
||||
spotify_client_id = contents['SPOTIFY_CLIENT_ID']
|
||||
spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET']
|
||||
spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI']
|
||||
return spotify_client_id, spotify_client_secret, spotify_redirect_uri
|
||||
|
||||
|
||||
def _start_server_and_wait_for_code() -> any:
|
||||
"""
|
||||
This function starts a server and waits for the user to visit the authorization URL
|
||||
and get the authorization code
|
||||
|
||||
:return: any
|
||||
"""
|
||||
class CallbackHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
parsed_url = urlparse(self.path)
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
if 'code' in query_params:
|
||||
self.server.authorization_code = query_params['code'][0]
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
self.wfile.write(b"Authorization successful! You can close this window.")
|
||||
|
||||
server = HTTPServer(('localhost', 8888), CallbackHandler)
|
||||
print("Starting server to capture the authorization code...")
|
||||
server.handle_request()
|
||||
return server.authorization_code
|
||||
|
||||
|
||||
def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, client_secret: str) -> tuple:
|
||||
"""
|
||||
This function exchanges the authorization code for an access token
|
||||
|
||||
:param code: str
|
||||
:param redirect_uri: str
|
||||
:param client_id: str
|
||||
:param client_secret: str
|
||||
:return: tuple
|
||||
"""
|
||||
|
||||
token_url = "https://accounts.spotify.com/api/token"
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}
|
||||
|
||||
data = {
|
||||
'grant_type': 'authorization_code',
|
||||
'code': code,
|
||||
'redirect_uri': redirect_uri,
|
||||
'client_id': client_id,
|
||||
'client_secret': client_secret,
|
||||
}
|
||||
|
||||
response = requests.post(token_url, data=data, headers=headers)
|
||||
response_data = response.json()
|
||||
|
||||
if 'access_token' not in response_data:
|
||||
raise Exception("Failed to get access token")
|
||||
|
||||
access_token = response_data['access_token']
|
||||
refresh_token = response_data.get('refresh_token', None)
|
||||
expires_in = response_data['expires_in']
|
||||
expires_at = time.time() + expires_in
|
||||
return access_token, refresh_token, expires_at
|
||||
|
||||
|
||||
def _refresh_access_token(refresh_token: str, client_id: str, client_secret: str) -> tuple:
|
||||
"""
|
||||
Refreshes the access token using the refresh token.
|
||||
|
||||
:param refresh_token: str
|
||||
:param client_id: str
|
||||
:param client_secret: str
|
||||
:return: tuple
|
||||
"""
|
||||
token_url = "https://accounts.spotify.com/api/token"
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}
|
||||
|
||||
data = {
|
||||
'grant_type': 'refresh_token',
|
||||
'refresh_token': refresh_token,
|
||||
'client_id': client_id,
|
||||
'client_secret': client_secret,
|
||||
}
|
||||
|
||||
response = requests.post(token_url, data=data, headers=headers)
|
||||
response_data = response.json()
|
||||
|
||||
if 'access_token' not in response_data:
|
||||
raise Exception("Failed to refresh access token")
|
||||
|
||||
access_token = response_data['access_token']
|
||||
expires_in = response_data['expires_in']
|
||||
expires_at = time.time() + expires_in
|
||||
return access_token, expires_at
|
||||
|
||||
|
||||
def _load_tokens(scope: str) -> tuple:
|
||||
"""
|
||||
Loads the tokens from the local file if they exist and are still valid.
|
||||
|
||||
:return: tuple or None
|
||||
"""
|
||||
if os.path.exists(TOKEN_FILE_PATH):
|
||||
with open(TOKEN_FILE_PATH, 'r') as f:
|
||||
tokens = json.load(f)
|
||||
if scope in tokens:
|
||||
if 'access_token' in tokens[scope] and 'expires_at' in tokens[scope] and 'expires_at' in tokens[scope]:
|
||||
return tokens[scope]['access_token'], tokens[scope]['refresh_token'], tokens[scope]['expires_at']
|
||||
return None
|
||||
|
||||
|
||||
def _save_tokens(access_token: str, refresh_token: str, scope: str, expires_at) -> None:
|
||||
"""
|
||||
Saves the access and refresh tokens to a local file.
|
||||
|
||||
:param access_token: str
|
||||
:param refresh_token: str
|
||||
:param scope: str
|
||||
"""
|
||||
tokens = {
|
||||
scope: {
|
||||
'access_token': access_token,
|
||||
'refresh_token': refresh_token,
|
||||
'expires_at': expires_at
|
||||
},
|
||||
}
|
||||
with open(TOKEN_FILE_PATH, 'w') as f:
|
||||
json.dump(tokens, f)
|
||||
|
||||
|
||||
def _refresh_tokens_file(access_token: str, scope: str, expires_at) -> None:
|
||||
"""
|
||||
Saves the access and refresh tokens to a local file.
|
||||
|
||||
:param access_token: str
|
||||
:param scope: str
|
||||
"""
|
||||
with open(TOKEN_FILE_PATH, 'r') as file:
|
||||
tokens = json.load(file)
|
||||
|
||||
if scope in tokens and 'refresh_token' in tokens[scope]:
|
||||
tokens[scope]['access_token'] = access_token
|
||||
tokens[scope]['expires_at'] = expires_at
|
||||
with open(TOKEN_FILE_PATH, 'w') as file:
|
||||
json.dump(tokens, file, indent=4)
|
||||
else:
|
||||
print(f"Error: Scope '{scope}' or refresh_token not found in the tokens file.")
|
||||
@@ -0,0 +1,87 @@
|
||||
import sqlite3
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Table(Enum):
|
||||
TRACK_INFORMATION = "track_information"
|
||||
ARTIST_INFORMATION = "artist_information"
|
||||
ALBUM_INFORMATION = "album_information"
|
||||
TRACK_ATTRIBUTES = "track_attributes"
|
||||
RECENTLY_PLAYED = "recently_played"
|
||||
|
||||
|
||||
class Database:
|
||||
"""
|
||||
A class to handle the database connection and operations
|
||||
"""
|
||||
|
||||
def __init__(self, db_name):
|
||||
"""Initialize the connection to the database"""
|
||||
self.db_name = db_name
|
||||
self.conn = sqlite3.connect(db_name)
|
||||
self.cursor = self.conn.cursor()
|
||||
self.create_tables()
|
||||
|
||||
def create_tables(self):
|
||||
"""Create the tables in the database"""
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.TRACK_INFORMATION.value} (
|
||||
track_id TEXT PRIMARY KEY,
|
||||
title TEXT
|
||||
);
|
||||
''')
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.ARTIST_INFORMATION.value} (
|
||||
artist_id TEXT PRIMARY KEY,
|
||||
artist_name TEXT
|
||||
);
|
||||
''')
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.ALBUM_INFORMATION.value} (
|
||||
album_id TEXT PRIMARY KEY,
|
||||
album_name TEXT
|
||||
);
|
||||
''')
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} (
|
||||
track_id TEXT PRIMARY KEY,
|
||||
attribute_name TEXT,
|
||||
attribute_value TEXT
|
||||
);
|
||||
''')
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.RECENTLY_PLAYED.value} (
|
||||
played_at TIMESTAMP PRIMARY KEY,
|
||||
track_id TEXT,
|
||||
artist_id TEXT,
|
||||
album_id TEXT,
|
||||
FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id),
|
||||
FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id),
|
||||
FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id)
|
||||
);
|
||||
''')
|
||||
|
||||
# Commit the changes
|
||||
self.conn.commit()
|
||||
|
||||
def add_row(self, table: Table, values):
|
||||
"""Add a new row into the specified table"""
|
||||
placeholders = ', '.join(['?'] * len(values))
|
||||
query = f"INSERT INTO {table.value} VALUES ({placeholders})"
|
||||
self.cursor.execute(query, values)
|
||||
self.conn.commit()
|
||||
|
||||
def read_all_rows(self, table: Table, column: str = "*"):
|
||||
"""Read all rows from the specified table"""
|
||||
self.cursor.execute(f"SELECT {column} FROM {table.value}")
|
||||
rows = self.cursor.fetchall()
|
||||
return rows
|
||||
|
||||
def close(self):
|
||||
"""Close the database connection"""
|
||||
self.conn.close()
|
||||
Vendored
+3
@@ -0,0 +1,3 @@
|
||||
SPOTIFY_CLIENT_ID=your_token_here
|
||||
SPOTIFY_CLIENT_SECRET=your_token_here
|
||||
SPOTIFY_REDIRECT_URI=http://localhost:8888/callback
|
||||
@@ -0,0 +1,85 @@
|
||||
import requests
|
||||
|
||||
from auth import authenticate, simple_authenticate
|
||||
from database_handler import Database, Table
|
||||
|
||||
db = Database('spotify_scraped.db')
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
This function is the main function that will be executed when the script is run
|
||||
"""
|
||||
global db
|
||||
|
||||
scope = "user-read-recently-played"
|
||||
bearer_token = authenticate(scope)
|
||||
|
||||
# Once each 30 mins
|
||||
_read_recently_played_page_and_add_to_db(bearer_token=bearer_token)
|
||||
|
||||
# Once a day
|
||||
all_track_ids = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id')
|
||||
bearer_toke_simple = simple_authenticate()
|
||||
for track_id in all_track_ids:
|
||||
response = _get_track_information(track_id=track_id[0], bearer_token=bearer_toke_simple)
|
||||
print(response)
|
||||
|
||||
# Close the database connection
|
||||
db.close()
|
||||
|
||||
|
||||
def _read_recently_played_page_and_add_to_db(bearer_token: str):
|
||||
"""
|
||||
"""
|
||||
global db
|
||||
|
||||
last_played_track = _get_last_played_track(bearer_token=bearer_token)
|
||||
|
||||
for track in last_played_track['items']:
|
||||
track_id = track['track']['id']
|
||||
played_at = track['played_at']
|
||||
album_id = track['track']['album']['id']
|
||||
artist_id = track['track']['artists'][0]['id']
|
||||
db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
|
||||
|
||||
|
||||
def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict:
|
||||
"""
|
||||
This function returns the last played track based on the limit size
|
||||
|
||||
:param limit: str
|
||||
:param bearer_token: str
|
||||
:return: dict
|
||||
"""
|
||||
|
||||
header = {
|
||||
'Authorization': f'Bearer {bearer_token}'
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=header)
|
||||
response_json = response.json()
|
||||
return response_json
|
||||
|
||||
|
||||
def _get_track_information(track_id: str, bearer_token: str) -> dict:
|
||||
"""
|
||||
This function returns the track information based on the track id
|
||||
|
||||
:param track_id: str
|
||||
:param bearer_token: str
|
||||
:return: dict
|
||||
"""
|
||||
|
||||
url = f"https://api.spotify.com/v1/tracks/{track_id}"
|
||||
header = {
|
||||
'Authorization': f'Bearer {bearer_token}'
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=header)
|
||||
response_json = response.json()
|
||||
return response_json
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user