From 1d6f07ef18910ad2f9b122aa82e67cfac15565c4 Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 17:16:18 +0100 Subject: [PATCH 01/17] Initial setup for scraper --- requirements.txt | 1 + src/scraper.py | 0 test/test_scraper.py | 0 3 files changed, 1 insertion(+) create mode 100644 requirements.txt create mode 100644 src/scraper.py create mode 100644 test/test_scraper.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3324f44 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +python-dotenv==1.0.1 diff --git a/src/scraper.py b/src/scraper.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_scraper.py b/test/test_scraper.py new file mode 100644 index 0000000..e69de29 From e6a1562155d6341bd41e79087d1c077c3a8a169d Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 17:26:50 +0100 Subject: [PATCH 02/17] Readme start --- README.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c934cae..0918419 100644 --- a/README.md +++ b/README.md @@ -1 +1,19 @@ -# predictify \ No newline at end of file +# Predictify + +## Overview + +A Data analysis tool to scrape your Spotify History usage and let a ML-Model predict your next songs + +## Usable possible APIs: + +Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played) + +Get Track: /tracks/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track) + +Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features) + +Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis) + +Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist) + + From 34c601986c4f6adb06f54f4e804732c7a06221c7 Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 17:27:58 +0100 Subject: [PATCH 03/17] Added vscode folder to gitignore and fixed md --- .gitignore | 3 +++ README.md | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 0a19790..6d92dc7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Visual Stidio Code +.vscode/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 0918419..0ad606d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A Data analysis tool to scrape your Spotify History usage and let a ML-Model predict your next songs -## Usable possible APIs: +## Usable possible APIs Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played) @@ -15,5 +15,3 @@ Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis) Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist) - - From ee76aabe581c2dfe2ce44c99e3ab6bce478c296f Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 17:28:24 +0100 Subject: [PATCH 04/17] Typo --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6d92dc7..bfec2cd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -# Visual Stidio Code +# Visual Studio Code .vscode/ # Byte-compiled / optimized / DLL files From 5b1be7bbe5374f0ed9ca29a87359a307e208cbd4 Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 19:43:56 +0100 Subject: [PATCH 05/17] A Example env file --- src/env/.env.example | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 src/env/.env.example diff --git a/src/env/.env.example b/src/env/.env.example new file mode 100644 index 0000000..545e283 --- /dev/null +++ b/src/env/.env.example @@ -0,0 +1,3 @@ +SPOTIFY_CLIENT_ID=your_token_here +SPOTIFY_CLIENT_SECRET=your_token_here +SPOTIFY_REDIRECT_URI=http://localhost:8888/callback From fb0911d8938a20dfbc917e8803ab65a4b14e5f7c Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 19:44:36 +0100 Subject: [PATCH 06/17] A basic auth framework --- .gitignore | 3 +- requirements.txt | 1 + src/scraper.py | 144 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index bfec2cd..60a5680 100644 --- a/.gitignore +++ b/.gitignore @@ -132,10 +132,9 @@ celerybeat.pid # Environments .env +!.env.example .venv -env/ venv/ -ENV/ env.bak/ venv.bak/ diff --git a/requirements.txt b/requirements.txt index 3324f44..6580c39 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ python-dotenv==1.0.1 +requests==2.32.3 diff --git a/src/scraper.py b/src/scraper.py index e69de29..b50b8c7 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -0,0 +1,144 @@ +import dotenv +import time +from urllib.parse import urlencode, urlparse, parse_qs +from http.server import BaseHTTPRequestHandler, HTTPServer +import requests +import os + +def main(): + + recently_played_access_token = authenticate() + last_played_track = get_last_played_track(limit=1, bearer_token=recently_played_access_token) + + +def authenticate() -> str: + """ + This function authenticates the user and returns the access token + """ + spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file() + + auth_url = _get_authorization_url(spotify_client_id, spotify_redirect_uri) + print(f'Please go to the following URL to authorize the app: {auth_url}') + + authorization_code = _start_server_and_wait_for_code() + + access_token, refresh_token = _exchange_code_for_token(authorization_code, redirect_uri=spotify_redirect_uri, + client_id=spotify_client_id, client_secret=spotify_client_secret) + + return access_token + + +def get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: + """ + This function returns the last played track based on the limit size + + :param limit: str + :param bearer_token: str + :return: dict + """ + + header = { + 'Authorization': f'Bearer {bearer_token}' + } + + response = requests.get(f'https://api.spotify.com/v1/me/player/recently-played?limit={limit}', headers=header) + response_json = response.json() + return response_json + + +def _get_authorization_url(client_id: str, redirect_uri: str) -> str: + """ + This function generates the URL that the user needs to visit to authorize the app + + :param client_id: str + :param redirect_uri: str + :return: str + """ + + auth_params = { + "response_type": "code", + "client_id": client_id, + "scope": "user-read-recently-played", + "redirect_uri": redirect_uri, + "state": str(int(time.time())) + } + auth_url = "https://accounts.spotify.com/authorize?" + urlencode(auth_params) + return auth_url + + +def _read_env_file() -> tuple: + """ + This function reads the .env file and returns the client_id, client_secret and redirect_uri + + :return: tuple + """ + current_dir = os.path.dirname(os.path.abspath(__file__)) + dotenv_folder_path = os.path.join(current_dir, 'env') + dotenv_path = os.path.join(dotenv_folder_path, '.env') + contents = dotenv.dotenv_values(dotenv_path=dotenv_path) + spotify_client_id = contents['SPOTIFY_CLIENT_ID'] + spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET'] + spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI'] + return spotify_client_id, spotify_client_secret, spotify_redirect_uri + + +def _start_server_and_wait_for_code() -> any: + """ + This function starts a server and waits for the user to visit the authorization URL + and get the authorization code + + :return: any + """ + class CallbackHandler(BaseHTTPRequestHandler): + def do_GET(self): + parsed_url = urlparse(self.path) + query_params = parse_qs(parsed_url.query) + if 'code' in query_params: + self.server.authorization_code = query_params['code'][0] + self.send_response(200) + self.end_headers() + self.wfile.write(b"Authorization successful! You can close this window.") + + server = HTTPServer(('localhost', 8888), CallbackHandler) + print("Starting server to capture the authorization code...") + server.handle_request() + return server.authorization_code + + +def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, client_secret: str) -> tuple: + """ + This function exchanges the authorization code for an access token + + :param code: str + :param redirect_uri: str + :param client_id: str + :param client_secret: str + :return: tuple + """ + + token_url = "https://accounts.spotify.com/api/token" + headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + } + + data = { + 'grant_type': 'authorization_code', + 'code': code, + 'redirect_uri': redirect_uri, + 'client_id': client_id, + 'client_secret': client_secret, + } + + response = requests.post(token_url, data=data, headers=headers) + response_data = response.json() + + if 'access_token' not in response_data: + raise Exception("Failed to get access token") + + access_token = response_data['access_token'] + refresh_token = response_data.get('refresh_token', None) + return access_token, refresh_token + + +if __name__ == '__main__': + main() \ No newline at end of file From 5091319f27fbb83d73e799400f1b397cc980a746 Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 19:45:52 +0100 Subject: [PATCH 07/17] Readme extended with auth documentation --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 0ad606d..e08039d 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,11 @@ A Data analysis tool to scrape your Spotify History usage and let a ML-Model predict your next songs +## Authentication API + +[Official Documentation](https://developer.spotify.com/documentation/web-api/tutorials/getting-started) +[Authorization Code Flow](https://developer.spotify.com/documentation/web-api/tutorials/code-flow) + ## Usable possible APIs Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played) From 668f840fff69d9f6b4b1d3d3545c55b25e390c60 Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 19:48:46 +0100 Subject: [PATCH 08/17] Authors Added --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index e08039d..f0a6c4a 100644 --- a/README.md +++ b/README.md @@ -20,3 +20,8 @@ Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis) Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist) + +## Authors + +[Chris Kiriakou](https://github.com/ckiri) +[Dominik Agres](https://github.com/agresdominik) From eb6a44a9b698930c30e11101e49a434e3b6ef344 Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 21:43:40 +0100 Subject: [PATCH 09/17] Implemented a token refresher/rotator and a token file so the user does not have to auth each time the code is ran --- src/auth.py | 203 +++++++++++++++++++++++++++++++++++++++++++++++++ src/scraper.py | 126 ++---------------------------- 2 files changed, 209 insertions(+), 120 deletions(-) create mode 100644 src/auth.py diff --git a/src/auth.py b/src/auth.py new file mode 100644 index 0000000..a6a0015 --- /dev/null +++ b/src/auth.py @@ -0,0 +1,203 @@ +import dotenv +import time +from urllib.parse import urlencode, urlparse, parse_qs +from http.server import BaseHTTPRequestHandler, HTTPServer +import requests +import os +import json + + +TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json') + + +def authenticate(scope: str) -> tuple: + """ + This function authenticates the user and returns the access token + + :param scope: str + :return: str + """ + spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file() + + tokens = _load_tokens() + if tokens: + access_token, refresh_token, expires_at = tokens + if time.time() < expires_at: + return access_token + else: + print("Token expired, refreshing...") + access_token, refresh_token = _refresh_access_token(refresh_token, spotify_client_id, spotify_client_secret) + _save_tokens(access_token, refresh_token) + return access_token + + auth_url = _get_authorization_url(spotify_client_id, spotify_redirect_uri, scope) + print(f'Please go to the following URL to authorize the app: {auth_url}') + + authorization_code = _start_server_and_wait_for_code() + + access_token, refresh_token = _exchange_code_for_token(authorization_code, redirect_uri=spotify_redirect_uri, + client_id=spotify_client_id, client_secret=spotify_client_secret) + + _save_tokens(access_token, refresh_token) + + return access_token + + +def _get_authorization_url(client_id: str, redirect_uri: str, scope: str) -> str: + """ + This function generates the URL that the user needs to visit to authorize the app + + :param client_id: str + :param redirect_uri: str + :param scope: str + :return: str + """ + + auth_params = { + "response_type": "code", + "client_id": client_id, + "scope": scope, + "redirect_uri": redirect_uri, + "state": str(int(time.time())) + } + auth_url = "https://accounts.spotify.com/authorize?" + urlencode(auth_params) + return auth_url + + +def _read_env_file() -> tuple: + """ + This function reads the .env file and returns the client_id, client_secret and redirect_uri + + :return: tuple + """ + current_dir = os.path.dirname(os.path.abspath(__file__)) + dotenv_folder_path = os.path.join(current_dir, 'env') + dotenv_path = os.path.join(dotenv_folder_path, '.env') + contents = dotenv.dotenv_values(dotenv_path=dotenv_path) + spotify_client_id = contents['SPOTIFY_CLIENT_ID'] + spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET'] + spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI'] + return spotify_client_id, spotify_client_secret, spotify_redirect_uri + + +def _start_server_and_wait_for_code() -> any: + """ + This function starts a server and waits for the user to visit the authorization URL + and get the authorization code + + :return: any + """ + class CallbackHandler(BaseHTTPRequestHandler): + def do_GET(self): + parsed_url = urlparse(self.path) + query_params = parse_qs(parsed_url.query) + if 'code' in query_params: + self.server.authorization_code = query_params['code'][0] + self.send_response(200) + self.end_headers() + self.wfile.write(b"Authorization successful! You can close this window.") + + server = HTTPServer(('localhost', 8888), CallbackHandler) + print("Starting server to capture the authorization code...") + server.handle_request() + return server.authorization_code + + +def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, client_secret: str) -> tuple: + """ + This function exchanges the authorization code for an access token + + :param code: str + :param redirect_uri: str + :param client_id: str + :param client_secret: str + :return: tuple + """ + + token_url = "https://accounts.spotify.com/api/token" + headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + } + + data = { + 'grant_type': 'authorization_code', + 'code': code, + 'redirect_uri': redirect_uri, + 'client_id': client_id, + 'client_secret': client_secret, + } + + response = requests.post(token_url, data=data, headers=headers) + response_data = response.json() + + if 'access_token' not in response_data: + raise Exception("Failed to get access token") + + access_token = response_data['access_token'] + refresh_token = response_data.get('refresh_token', None) + return access_token, refresh_token + + +def _refresh_access_token(refresh_token: str, client_id: str, client_secret: str) -> tuple: + """ + Refreshes the access token using the refresh token. + + :param refresh_token: str + :param client_id: str + :param client_secret: str + :return: tuple + """ + token_url = "https://accounts.spotify.com/api/token" + headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + } + + data = { + 'grant_type': 'refresh_token', + 'refresh_token': refresh_token, + 'client_id': client_id, + 'client_secret': client_secret, + } + + response = requests.post(token_url, data=data, headers=headers) + response_data = response.json() + + if 'access_token' not in response_data: + raise Exception("Failed to refresh access token") + + access_token = response_data['access_token'] + expires_in = response_data['expires_in'] + expires_at = time.time() + expires_in + return access_token, expires_at + + +def _load_tokens() -> tuple: + """ + Loads the tokens from the local file if they exist and are still valid. + + :return: tuple or None + """ + if os.path.exists(TOKEN_FILE_PATH): + with open(TOKEN_FILE_PATH, 'r') as f: + tokens = json.load(f) + if 'access_token' in tokens and 'expires_at' in tokens and time.time() < tokens['expires_at']: + return tokens['access_token'], tokens['refresh_token'], tokens['expires_at'] + return None + + +def _save_tokens(access_token: str, refresh_token: str) -> None: + """ + Saves the access and refresh tokens to a local file. + + :param access_token: str + :param refresh_token: str + """ + expires_in = 3600 # Default expiration time, adjust as needed + expires_at = time.time() + expires_in + tokens = { + 'access_token': access_token, + 'refresh_token': refresh_token, + 'expires_at': expires_at + } + with open(TOKEN_FILE_PATH, 'w') as f: + json.dump(tokens, f) \ No newline at end of file diff --git a/src/scraper.py b/src/scraper.py index b50b8c7..02ef9b5 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -1,34 +1,14 @@ -import dotenv -import time -from urllib.parse import urlencode, urlparse, parse_qs -from http.server import BaseHTTPRequestHandler, HTTPServer import requests -import os +from auth import authenticate def main(): - - recently_played_access_token = authenticate() - last_played_track = get_last_played_track(limit=1, bearer_token=recently_played_access_token) + scope = "user-read-recently-played" + bearer_token = authenticate(scope) + last_played_track = _get_last_played_track(bearer_token=bearer_token) + print(last_played_track) -def authenticate() -> str: - """ - This function authenticates the user and returns the access token - """ - spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file() - - auth_url = _get_authorization_url(spotify_client_id, spotify_redirect_uri) - print(f'Please go to the following URL to authorize the app: {auth_url}') - - authorization_code = _start_server_and_wait_for_code() - - access_token, refresh_token = _exchange_code_for_token(authorization_code, redirect_uri=spotify_redirect_uri, - client_id=spotify_client_id, client_secret=spotify_client_secret) - - return access_token - - -def get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: +def _get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: """ This function returns the last played track based on the limit size @@ -46,99 +26,5 @@ def get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: return response_json -def _get_authorization_url(client_id: str, redirect_uri: str) -> str: - """ - This function generates the URL that the user needs to visit to authorize the app - - :param client_id: str - :param redirect_uri: str - :return: str - """ - - auth_params = { - "response_type": "code", - "client_id": client_id, - "scope": "user-read-recently-played", - "redirect_uri": redirect_uri, - "state": str(int(time.time())) - } - auth_url = "https://accounts.spotify.com/authorize?" + urlencode(auth_params) - return auth_url - - -def _read_env_file() -> tuple: - """ - This function reads the .env file and returns the client_id, client_secret and redirect_uri - - :return: tuple - """ - current_dir = os.path.dirname(os.path.abspath(__file__)) - dotenv_folder_path = os.path.join(current_dir, 'env') - dotenv_path = os.path.join(dotenv_folder_path, '.env') - contents = dotenv.dotenv_values(dotenv_path=dotenv_path) - spotify_client_id = contents['SPOTIFY_CLIENT_ID'] - spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET'] - spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI'] - return spotify_client_id, spotify_client_secret, spotify_redirect_uri - - -def _start_server_and_wait_for_code() -> any: - """ - This function starts a server and waits for the user to visit the authorization URL - and get the authorization code - - :return: any - """ - class CallbackHandler(BaseHTTPRequestHandler): - def do_GET(self): - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - if 'code' in query_params: - self.server.authorization_code = query_params['code'][0] - self.send_response(200) - self.end_headers() - self.wfile.write(b"Authorization successful! You can close this window.") - - server = HTTPServer(('localhost', 8888), CallbackHandler) - print("Starting server to capture the authorization code...") - server.handle_request() - return server.authorization_code - - -def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, client_secret: str) -> tuple: - """ - This function exchanges the authorization code for an access token - - :param code: str - :param redirect_uri: str - :param client_id: str - :param client_secret: str - :return: tuple - """ - - token_url = "https://accounts.spotify.com/api/token" - headers = { - 'Content-Type': 'application/x-www-form-urlencoded', - } - - data = { - 'grant_type': 'authorization_code', - 'code': code, - 'redirect_uri': redirect_uri, - 'client_id': client_id, - 'client_secret': client_secret, - } - - response = requests.post(token_url, data=data, headers=headers) - response_data = response.json() - - if 'access_token' not in response_data: - raise Exception("Failed to get access token") - - access_token = response_data['access_token'] - refresh_token = response_data.get('refresh_token', None) - return access_token, refresh_token - - if __name__ == '__main__': main() \ No newline at end of file From e6e788861348b98e68de633a0338dd0c3559a035 Mon Sep 17 00:00:00 2001 From: agres Date: Tue, 18 Mar 2025 21:44:22 +0100 Subject: [PATCH 10/17] Added tokens file to gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 60a5680..81dd2a1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Custom Tokens file/rotator +tokens.json + # Visual Studio Code .vscode/ From bb3c37ca488980c82817369e6d1b5d7903793403 Mon Sep 17 00:00:00 2001 From: agres Date: Wed, 19 Mar 2025 01:05:51 +0100 Subject: [PATCH 11/17] Pre commit hook --- .pre-commit-config.yaml | 28 ++++++++++++++++++++++++++++ requirements.txt | 1 + 2 files changed, 29 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..074425f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +# .pre-commit-config.yaml + +repos: + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace # Remove trailing whitespace + - id: end-of-file-fixer # Ensure a single newline at the end of a file + - id: check-yaml # Check if the YAML files are valid + - id: check-json # Check if the JSON files are valid + - id: check-added-large-files # Prevent large files from being committed + args: ['--maxkb=1000'] + - id: check-ast # Check for parse errors in Python files + - id: debug-statements # Check for print statements and pdb calls + - id: end-of-file-fixer # Ensure a single newline at the end of a file + + - repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort + args: ['--profile=black'] + + - repo: https://github.com/PyCQA/flake8 + rev: 7.1.1 + hooks: + - id: flake8 + args: ['--extend-ignore=E501,E402,W503,E721','--max-line-length=100'] diff --git a/requirements.txt b/requirements.txt index 6580c39..84d2db1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ python-dotenv==1.0.1 requests==2.32.3 +pre-commit==4.1.0 From c3e3db87cccbc21e45d05c6397cafb8fbf06ad2e Mon Sep 17 00:00:00 2001 From: agres Date: Wed, 19 Mar 2025 01:06:52 +0100 Subject: [PATCH 12/17] Added test newline --- src/scraper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/scraper.py b/src/scraper.py index 02ef9b5..fabb82c 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -1,7 +1,12 @@ import requests + from auth import authenticate + def main(): + """ + This function is the main function that will be executed when the script is run + """ scope = "user-read-recently-played" bearer_token = authenticate(scope) last_played_track = _get_last_played_track(bearer_token=bearer_token) @@ -27,4 +32,4 @@ def _get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: if __name__ == '__main__': - main() \ No newline at end of file + main() From 12096b2ec0c3f4566a949b920f9fb91457b16170 Mon Sep 17 00:00:00 2001 From: agres Date: Wed, 19 Mar 2025 01:07:42 +0100 Subject: [PATCH 13/17] Fixed doc --- src/auth.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/auth.py b/src/auth.py index a6a0015..abfc986 100644 --- a/src/auth.py +++ b/src/auth.py @@ -1,16 +1,16 @@ -import dotenv -import time -from urllib.parse import urlencode, urlparse, parse_qs -from http.server import BaseHTTPRequestHandler, HTTPServer -import requests -import os import json +import os +import time +from http.server import BaseHTTPRequestHandler, HTTPServer +from urllib.parse import parse_qs, urlencode, urlparse +import dotenv +import requests TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json') -def authenticate(scope: str) -> tuple: +def authenticate(scope: str) -> str: """ This function authenticates the user and returns the access token @@ -29,14 +29,14 @@ def authenticate(scope: str) -> tuple: access_token, refresh_token = _refresh_access_token(refresh_token, spotify_client_id, spotify_client_secret) _save_tokens(access_token, refresh_token) return access_token - + auth_url = _get_authorization_url(spotify_client_id, spotify_redirect_uri, scope) print(f'Please go to the following URL to authorize the app: {auth_url}') authorization_code = _start_server_and_wait_for_code() access_token, refresh_token = _exchange_code_for_token(authorization_code, redirect_uri=spotify_redirect_uri, - client_id=spotify_client_id, client_secret=spotify_client_secret) + client_id=spotify_client_id, client_secret=spotify_client_secret) _save_tokens(access_token, refresh_token) @@ -96,7 +96,7 @@ def _start_server_and_wait_for_code() -> any: self.send_response(200) self.end_headers() self.wfile.write(b"Authorization successful! You can close this window.") - + server = HTTPServer(('localhost', 8888), CallbackHandler) print("Starting server to capture the authorization code...") server.handle_request() @@ -200,4 +200,4 @@ def _save_tokens(access_token: str, refresh_token: str) -> None: 'expires_at': expires_at } with open(TOKEN_FILE_PATH, 'w') as f: - json.dump(tokens, f) \ No newline at end of file + json.dump(tokens, f) From 8d407d3d522a5467af53e4a56e85210a7cea4916 Mon Sep 17 00:00:00 2001 From: agres Date: Wed, 19 Mar 2025 01:22:38 +0100 Subject: [PATCH 14/17] Added pytest, excluded tests from precommit --- .pre-commit-config.yaml | 23 ++++++++++++++++++++++- requirements.txt | 1 + 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 074425f..f748997 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,23 +6,44 @@ repos: rev: v5.0.0 hooks: - id: trailing-whitespace # Remove trailing whitespace + exclude: '.*test.*' + files: \.(py)$ + - id: end-of-file-fixer # Ensure a single newline at the end of a file + exclude: '.*test.*' + files: \.(py)$ + - id: check-yaml # Check if the YAML files are valid + exclude: '.*test.*' + files: \.(yaml|yml)$ + - id: check-json # Check if the JSON files are valid + exclude: '.*test.*' + files: \.(json)$ + - id: check-added-large-files # Prevent large files from being committed args: ['--maxkb=1000'] + - id: check-ast # Check for parse errors in Python files + exclude: '.*test.*' + files: \.(py)$ + - id: debug-statements # Check for print statements and pdb calls - - id: end-of-file-fixer # Ensure a single newline at the end of a file + exclude: '.*test.*' + files: \.(py)$ - repo: https://github.com/PyCQA/isort rev: 5.13.2 hooks: - id: isort args: ['--profile=black'] + files: \.(py)$ + exclude: '.*test.*' - repo: https://github.com/PyCQA/flake8 rev: 7.1.1 hooks: - id: flake8 args: ['--extend-ignore=E501,E402,W503,E721','--max-line-length=100'] + files: \.(py)$ + exclude: '.*test.*' diff --git a/requirements.txt b/requirements.txt index 84d2db1..5bd2999 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ python-dotenv==1.0.1 requests==2.32.3 pre-commit==4.1.0 +pytest==8.3.5 From 55eede61ccc7ce7a375b71cc0da9e83ade427812 Mon Sep 17 00:00:00 2001 From: agres Date: Wed, 19 Mar 2025 01:36:50 +0100 Subject: [PATCH 15/17] Added pytest and config file --- pytest.ini | 5 +++++ requirements.txt | 2 ++ test/test_auth.py | 0 3 files changed, 7 insertions(+) create mode 100644 pytest.ini create mode 100644 test/test_auth.py diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..46f840e --- /dev/null +++ b/pytest.ini @@ -0,0 +1,5 @@ +# pytest.ini +[pytest] +# Set the root directory to the current directory (.) +rootdir = . +pythonpath = . diff --git a/requirements.txt b/requirements.txt index 5bd2999..89f3571 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,5 @@ python-dotenv==1.0.1 requests==2.32.3 pre-commit==4.1.0 pytest==8.3.5 +coverage==7.7.0 +pytest-cov==6.0.0 diff --git a/test/test_auth.py b/test/test_auth.py new file mode 100644 index 0000000..e69de29 From 77654f13c94ffcf815a71956531e516ac247b7c7 Mon Sep 17 00:00:00 2001 From: agres Date: Wed, 19 Mar 2025 16:27:25 +0100 Subject: [PATCH 16/17] Big commit --- src/database_handler.py | 87 +++++++++++++++++++++++++++++++++++++++++ src/scraper.py | 55 ++++++++++++++++++++++++-- 2 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 src/database_handler.py diff --git a/src/database_handler.py b/src/database_handler.py new file mode 100644 index 0000000..ce0478e --- /dev/null +++ b/src/database_handler.py @@ -0,0 +1,87 @@ +import sqlite3 +from enum import Enum + + +class Table(Enum): + TRACK_INFORMATION = "track_information" + ARTIST_INFORMATION = "artist_information" + ALBUM_INFORMATION = "album_information" + TRACK_ATTRIBUTES = "track_attributes" + RECENTLY_PLAYED = "recently_played" + + +class Database: + """ + A class to handle the database connection and operations + """ + + def __init__(self, db_name): + """Initialize the connection to the database""" + self.db_name = db_name + self.conn = sqlite3.connect(db_name) + self.cursor = self.conn.cursor() + self.create_tables() + + def create_tables(self): + """Create the tables in the database""" + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.TRACK_INFORMATION.value} ( + track_id TEXT PRIMARY KEY, + title TEXT + ); + ''') + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.ARTIST_INFORMATION.value} ( + artist_id TEXT PRIMARY KEY, + artist_name TEXT + ); + ''') + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.ALBUM_INFORMATION.value} ( + album_id TEXT PRIMARY KEY, + album_name TEXT + ); + ''') + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} ( + track_id TEXT PRIMARY KEY, + attribute_name TEXT, + attribute_value TEXT + ); + ''') + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.RECENTLY_PLAYED.value} ( + played_at TIMESTAMP PRIMARY KEY, + track_id TEXT, + artist_id TEXT, + album_id TEXT, + FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id), + FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id), + FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id) + ); + ''') + + # Commit the changes + self.conn.commit() + + def add_row(self, table: Table, values): + """Add a new row into the specified table""" + placeholders = ', '.join(['?'] * len(values)) + query = f"INSERT INTO {table.value} VALUES ({placeholders})" + self.cursor.execute(query, values) + self.conn.commit() + + def read_all_rows(self, table: Table, column: str = "*"): + """Read all rows from the specified table""" + self.cursor.execute(f"SELECT {column} FROM {table.value}") + rows = self.cursor.fetchall() + return rows + + def close(self): + """Close the database connection""" + self.conn.close() diff --git a/src/scraper.py b/src/scraper.py index fabb82c..c2144ce 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -1,19 +1,49 @@ import requests from auth import authenticate +from database_handler import Database, Table + +db = Database('spotify_scraped.db') def main(): """ This function is the main function that will be executed when the script is run """ + global db + scope = "user-read-recently-played" bearer_token = authenticate(scope) + + # Once each 30 mins + _read_recently_played_page_and_add_to_db(bearer_token=bearer_token) + + # Once a day + all_track_ids = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id') + for track_id in all_track_ids: + response = _get_track_information(track_id=track_id, bearer_token=bearer_token) + print(response) + + # Close the database connection + db.close() + + +def _read_recently_played_page_and_add_to_db(bearer_token: str): + """ + """ + global db + last_played_track = _get_last_played_track(bearer_token=bearer_token) - print(last_played_track) + + for track in last_played_track['items']: + track_id = track['track']['id'] + played_at = track['played_at'] + album_id = track['track']['album']['id'] + artist_id = track['track']['artists'][0]['id'] + db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id)) -def _get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: +def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict: """ This function returns the last played track based on the limit size @@ -26,7 +56,26 @@ def _get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: 'Authorization': f'Bearer {bearer_token}' } - response = requests.get(f'https://api.spotify.com/v1/me/player/recently-played?limit={limit}', headers=header) + response = requests.get(url, headers=header) + response_json = response.json() + return response_json + + +def _get_track_information(track_id: str, bearer_token: str) -> dict: + """ + This function returns the track information based on the track id + + :param track_id: str + :param bearer_token: str + :return: dict + """ + + url = f"https://api.spotify.com/v1/tracks/{track_id}" + header = { + 'Authorization': f'Bearer {bearer_token}' + } + + response = requests.get(url, headers=header) response_json = response.json() return response_json From 9235b5a844923d2bd7f9a4e763609ccfbd0488e4 Mon Sep 17 00:00:00 2001 From: agres Date: Wed, 19 Mar 2025 16:27:48 +0100 Subject: [PATCH 17/17] Gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 81dd2a1..2bb4134 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# databases +*.db + # Custom Tokens file/rotator tokens.json