mirror of
https://github.com/agresdominik/predictify.git
synced 2026-04-21 17:55:49 +00:00
+13
-2
@@ -1,3 +1,15 @@
|
|||||||
|
# Test running file
|
||||||
|
main_test.py
|
||||||
|
|
||||||
|
# databases
|
||||||
|
*.db
|
||||||
|
|
||||||
|
# Custom Tokens file/rotator
|
||||||
|
tokens.json
|
||||||
|
|
||||||
|
# Visual Studio Code
|
||||||
|
.vscode/
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
@@ -129,10 +141,9 @@ celerybeat.pid
|
|||||||
|
|
||||||
# Environments
|
# Environments
|
||||||
.env
|
.env
|
||||||
|
!.env.example
|
||||||
.venv
|
.venv
|
||||||
env/
|
|
||||||
venv/
|
venv/
|
||||||
ENV/
|
|
||||||
env.bak/
|
env.bak/
|
||||||
venv.bak/
|
venv.bak/
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
# .pre-commit-config.yaml
|
||||||
|
|
||||||
|
repos:
|
||||||
|
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v5.0.0
|
||||||
|
hooks:
|
||||||
|
- id: trailing-whitespace # Remove trailing whitespace
|
||||||
|
exclude: '.*test.*'
|
||||||
|
files: \.(py)$
|
||||||
|
|
||||||
|
- id: end-of-file-fixer # Ensure a single newline at the end of a file
|
||||||
|
exclude: '.*test.*'
|
||||||
|
files: \.(py)$
|
||||||
|
|
||||||
|
- id: check-yaml # Check if the YAML files are valid
|
||||||
|
exclude: '.*test.*'
|
||||||
|
files: \.(yaml|yml)$
|
||||||
|
|
||||||
|
- id: check-json # Check if the JSON files are valid
|
||||||
|
exclude: '.*test.*'
|
||||||
|
files: \.(json)$
|
||||||
|
|
||||||
|
- id: check-added-large-files # Prevent large files from being committed
|
||||||
|
args: ['--maxkb=1000']
|
||||||
|
|
||||||
|
- id: check-ast # Check for parse errors in Python files
|
||||||
|
exclude: '.*test.*'
|
||||||
|
files: \.(py)$
|
||||||
|
|
||||||
|
- id: debug-statements # Check for print statements and pdb calls
|
||||||
|
exclude: '.*test.*'
|
||||||
|
files: \.(py)$
|
||||||
|
|
||||||
|
- repo: https://github.com/PyCQA/isort
|
||||||
|
rev: 5.13.2
|
||||||
|
hooks:
|
||||||
|
- id: isort
|
||||||
|
args: ['--profile=black']
|
||||||
|
files: \.(py)$
|
||||||
|
exclude: '.*test.*'
|
||||||
|
|
||||||
|
- repo: https://github.com/PyCQA/flake8
|
||||||
|
rev: 7.1.1
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
args: ['--extend-ignore=E501,E402,W503,E721','--max-line-length=100']
|
||||||
|
files: \.(py)$
|
||||||
|
exclude: '.*test.*'
|
||||||
+24
@@ -0,0 +1,24 @@
|
|||||||
|
FROM alpine:latest
|
||||||
|
|
||||||
|
WORKDIR /root
|
||||||
|
|
||||||
|
RUN apk update && \
|
||||||
|
apk add --no-cache \
|
||||||
|
openssh \
|
||||||
|
python3 \
|
||||||
|
py3-pip \
|
||||||
|
sqlite
|
||||||
|
|
||||||
|
EXPOSE 22
|
||||||
|
|
||||||
|
RUN mkdir /root/src
|
||||||
|
|
||||||
|
COPY ./startup.sh /root
|
||||||
|
COPY ./requirements.txt /root
|
||||||
|
COPY ./src/ /root/src/
|
||||||
|
|
||||||
|
RUN ls -la
|
||||||
|
|
||||||
|
VOLUME /root
|
||||||
|
|
||||||
|
ENTRYPOINT ["/bin/sh", "/root/startup.sh"]
|
||||||
@@ -1 +1,27 @@
|
|||||||
# predictify
|
# Predictify
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
A Data analysis tool to scrape your Spotify History usage and let a ML-Model predict your next songs
|
||||||
|
|
||||||
|
## Authentication API
|
||||||
|
|
||||||
|
[Official Documentation](https://developer.spotify.com/documentation/web-api/tutorials/getting-started)
|
||||||
|
[Authorization Code Flow](https://developer.spotify.com/documentation/web-api/tutorials/code-flow)
|
||||||
|
|
||||||
|
## Usable possible APIs
|
||||||
|
|
||||||
|
Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)
|
||||||
|
|
||||||
|
Get Track: /tracks/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)
|
||||||
|
|
||||||
|
Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)
|
||||||
|
|
||||||
|
Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)
|
||||||
|
|
||||||
|
Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
|
||||||
|
|
||||||
|
## Authors
|
||||||
|
|
||||||
|
[Chris Kiriakou](https://github.com/ckiri)
|
||||||
|
[Dominik Agres](https://github.com/agresdominik)
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
# pytest.ini
|
||||||
|
[pytest]
|
||||||
|
# Set the root directory to the current directory (.)
|
||||||
|
rootdir = .
|
||||||
|
pythonpath = .
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
python-dotenv==1.0.1
|
||||||
|
requests==2.32.3
|
||||||
|
pre-commit==4.1.0
|
||||||
|
pytest==8.3.5
|
||||||
|
coverage==7.7.0
|
||||||
|
pytest-cov==6.0.0
|
||||||
+257
@@ -0,0 +1,257 @@
|
|||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import logging as log
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
from urllib.parse import parse_qs, urlencode, urlparse
|
||||||
|
|
||||||
|
import dotenv
|
||||||
|
import requests
|
||||||
|
|
||||||
|
TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json')
|
||||||
|
|
||||||
|
|
||||||
|
def simple_authenticate(grant_type: str = "client_credentials") -> str:
|
||||||
|
"""
|
||||||
|
This function authenticates the user and returns the access token
|
||||||
|
|
||||||
|
:return: str
|
||||||
|
"""
|
||||||
|
spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file()
|
||||||
|
token_url = "https://accounts.spotify.com/api/token"
|
||||||
|
auth_value = f"{spotify_client_id}:{spotify_client_secret}"
|
||||||
|
auth_header = base64.b64encode(auth_value.encode('utf-8')).decode('utf-8')
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Basic {auth_header}",
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded"
|
||||||
|
}
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"grant_type": f"{grant_type}"
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(token_url, headers=headers, data=data)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
access_token = response.json().get('access_token')
|
||||||
|
return access_token
|
||||||
|
else:
|
||||||
|
log.error(f"Error {response.status_code}: {response.text}")
|
||||||
|
|
||||||
|
|
||||||
|
def authenticate(scope: str) -> str:
|
||||||
|
"""
|
||||||
|
This function authenticates the user and returns the access token
|
||||||
|
|
||||||
|
:param scope: str
|
||||||
|
:return: str
|
||||||
|
"""
|
||||||
|
spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file()
|
||||||
|
|
||||||
|
tokens = _load_tokens(scope)
|
||||||
|
if tokens:
|
||||||
|
access_token, refresh_token, expires_at = tokens
|
||||||
|
if time.time() < expires_at:
|
||||||
|
return access_token
|
||||||
|
else:
|
||||||
|
log.info(f"Token for scope {scope} expired, refreshing...")
|
||||||
|
access_token, expires_at = _refresh_access_token(refresh_token, spotify_client_id, spotify_client_secret)
|
||||||
|
_refresh_tokens_file(access_token, scope, expires_at)
|
||||||
|
return access_token
|
||||||
|
|
||||||
|
auth_url = _get_authorization_url(spotify_client_id, spotify_redirect_uri, scope)
|
||||||
|
print(f'Please go to the following URL to authorize the app: {auth_url}')
|
||||||
|
|
||||||
|
authorization_code = _start_server_and_wait_for_code()
|
||||||
|
|
||||||
|
access_token, refresh_token, expires_at = _exchange_code_for_token(authorization_code, redirect_uri=spotify_redirect_uri,
|
||||||
|
client_id=spotify_client_id, client_secret=spotify_client_secret)
|
||||||
|
|
||||||
|
_save_tokens(access_token, refresh_token, scope, expires_at)
|
||||||
|
|
||||||
|
return access_token
|
||||||
|
|
||||||
|
|
||||||
|
def _get_authorization_url(client_id: str, redirect_uri: str, scope: str) -> str:
|
||||||
|
"""
|
||||||
|
This function generates the URL that the user needs to visit to authorize the app
|
||||||
|
|
||||||
|
:param client_id: str
|
||||||
|
:param redirect_uri: str
|
||||||
|
:param scope: str
|
||||||
|
:return: str
|
||||||
|
"""
|
||||||
|
|
||||||
|
auth_params = {
|
||||||
|
"response_type": "code",
|
||||||
|
"client_id": client_id,
|
||||||
|
"scope": scope,
|
||||||
|
"redirect_uri": redirect_uri,
|
||||||
|
"state": str(int(time.time()))
|
||||||
|
}
|
||||||
|
auth_url = "https://accounts.spotify.com/authorize?" + urlencode(auth_params)
|
||||||
|
return auth_url
|
||||||
|
|
||||||
|
|
||||||
|
def _read_env_file() -> tuple:
|
||||||
|
"""
|
||||||
|
This function reads the .env file and returns the client_id, client_secret and redirect_uri
|
||||||
|
|
||||||
|
:return: tuple
|
||||||
|
"""
|
||||||
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
dotenv_folder_path = os.path.join(current_dir, 'env')
|
||||||
|
dotenv_path = os.path.join(dotenv_folder_path, '.env')
|
||||||
|
contents = dotenv.dotenv_values(dotenv_path=dotenv_path)
|
||||||
|
spotify_client_id = contents['SPOTIFY_CLIENT_ID']
|
||||||
|
spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET']
|
||||||
|
spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI']
|
||||||
|
return spotify_client_id, spotify_client_secret, spotify_redirect_uri
|
||||||
|
|
||||||
|
|
||||||
|
def _start_server_and_wait_for_code() -> any:
|
||||||
|
"""
|
||||||
|
This function starts a server and waits for the user to visit the authorization URL
|
||||||
|
and get the authorization code
|
||||||
|
|
||||||
|
:return: any
|
||||||
|
"""
|
||||||
|
class CallbackHandler(BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
parsed_url = urlparse(self.path)
|
||||||
|
query_params = parse_qs(parsed_url.query)
|
||||||
|
if 'code' in query_params:
|
||||||
|
self.server.authorization_code = query_params['code'][0]
|
||||||
|
self.send_response(200)
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b"Authorization successful! You can close this window.")
|
||||||
|
|
||||||
|
server = HTTPServer(('localhost', 8888), CallbackHandler)
|
||||||
|
log.info("Starting server to capture the authorization code...")
|
||||||
|
server.handle_request()
|
||||||
|
return server.authorization_code
|
||||||
|
|
||||||
|
|
||||||
|
def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, client_secret: str) -> tuple:
|
||||||
|
"""
|
||||||
|
This function exchanges the authorization code for an access token
|
||||||
|
|
||||||
|
:param code: str
|
||||||
|
:param redirect_uri: str
|
||||||
|
:param client_id: str
|
||||||
|
:param client_secret: str
|
||||||
|
:return: tuple
|
||||||
|
"""
|
||||||
|
|
||||||
|
token_url = "https://accounts.spotify.com/api/token"
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'grant_type': 'authorization_code',
|
||||||
|
'code': code,
|
||||||
|
'redirect_uri': redirect_uri,
|
||||||
|
'client_id': client_id,
|
||||||
|
'client_secret': client_secret,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(token_url, data=data, headers=headers)
|
||||||
|
response_data = response.json()
|
||||||
|
|
||||||
|
if 'access_token' not in response_data:
|
||||||
|
raise Exception("Failed to get access token")
|
||||||
|
|
||||||
|
access_token = response_data['access_token']
|
||||||
|
refresh_token = response_data.get('refresh_token', None)
|
||||||
|
expires_in = response_data['expires_in']
|
||||||
|
expires_at = time.time() + expires_in
|
||||||
|
return access_token, refresh_token, expires_at
|
||||||
|
|
||||||
|
|
||||||
|
def _refresh_access_token(refresh_token: str, client_id: str, client_secret: str) -> tuple:
|
||||||
|
"""
|
||||||
|
Refreshes the access token using the refresh token.
|
||||||
|
|
||||||
|
:param refresh_token: str
|
||||||
|
:param client_id: str
|
||||||
|
:param client_secret: str
|
||||||
|
:return: tuple
|
||||||
|
"""
|
||||||
|
token_url = "https://accounts.spotify.com/api/token"
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'grant_type': 'refresh_token',
|
||||||
|
'refresh_token': refresh_token,
|
||||||
|
'client_id': client_id,
|
||||||
|
'client_secret': client_secret,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(token_url, data=data, headers=headers)
|
||||||
|
response_data = response.json()
|
||||||
|
|
||||||
|
if 'access_token' not in response_data:
|
||||||
|
raise Exception("Failed to refresh access token")
|
||||||
|
|
||||||
|
access_token = response_data['access_token']
|
||||||
|
expires_in = response_data['expires_in']
|
||||||
|
expires_at = time.time() + expires_in
|
||||||
|
return access_token, expires_at
|
||||||
|
|
||||||
|
|
||||||
|
def _load_tokens(scope: str) -> tuple:
|
||||||
|
"""
|
||||||
|
Loads the tokens from the local file if they exist and are still valid.
|
||||||
|
|
||||||
|
:return: tuple or None
|
||||||
|
"""
|
||||||
|
if os.path.exists(TOKEN_FILE_PATH):
|
||||||
|
with open(TOKEN_FILE_PATH, 'r') as f:
|
||||||
|
tokens = json.load(f)
|
||||||
|
if scope in tokens:
|
||||||
|
if 'access_token' in tokens[scope] and 'expires_at' in tokens[scope] and 'expires_at' in tokens[scope]:
|
||||||
|
return tokens[scope]['access_token'], tokens[scope]['refresh_token'], tokens[scope]['expires_at']
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _save_tokens(access_token: str, refresh_token: str, scope: str, expires_at) -> None:
|
||||||
|
"""
|
||||||
|
Saves the access and refresh tokens to a local file.
|
||||||
|
|
||||||
|
:param access_token: str
|
||||||
|
:param refresh_token: str
|
||||||
|
:param scope: str
|
||||||
|
"""
|
||||||
|
tokens = {
|
||||||
|
scope: {
|
||||||
|
'access_token': access_token,
|
||||||
|
'refresh_token': refresh_token,
|
||||||
|
'expires_at': expires_at
|
||||||
|
},
|
||||||
|
}
|
||||||
|
with open(TOKEN_FILE_PATH, 'w') as f:
|
||||||
|
json.dump(tokens, f)
|
||||||
|
|
||||||
|
|
||||||
|
def _refresh_tokens_file(access_token: str, scope: str, expires_at) -> None:
|
||||||
|
"""
|
||||||
|
Saves the access and refresh tokens to a local file.
|
||||||
|
|
||||||
|
:param access_token: str
|
||||||
|
:param scope: str
|
||||||
|
"""
|
||||||
|
with open(TOKEN_FILE_PATH, 'r') as file:
|
||||||
|
tokens = json.load(file)
|
||||||
|
|
||||||
|
if scope in tokens and 'refresh_token' in tokens[scope]:
|
||||||
|
tokens[scope]['access_token'] = access_token
|
||||||
|
tokens[scope]['expires_at'] = expires_at
|
||||||
|
with open(TOKEN_FILE_PATH, 'w') as file:
|
||||||
|
json.dump(tokens, file, indent=4)
|
||||||
|
else:
|
||||||
|
log.error(f"Error: Scope '{scope}' or refresh_token not found in the tokens file.")
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
import logging as log
|
||||||
|
import sqlite3
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class Table(Enum):
|
||||||
|
TRACK_INFORMATION = "track_information"
|
||||||
|
ARTIST_INFORMATION = "artist_information"
|
||||||
|
ALBUM_INFORMATION = "album_information"
|
||||||
|
TRACK_ATTRIBUTES = "track_attributes"
|
||||||
|
RECENTLY_PLAYED = "recently_played"
|
||||||
|
|
||||||
|
|
||||||
|
class Database:
|
||||||
|
"""
|
||||||
|
A class to handle the database connection and operations
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, db_name):
|
||||||
|
"""Initialize the connection to the database"""
|
||||||
|
self.db_name = db_name
|
||||||
|
self.conn = sqlite3.connect(db_name)
|
||||||
|
self.cursor = self.conn.cursor()
|
||||||
|
self.create_tables()
|
||||||
|
|
||||||
|
def create_tables(self):
|
||||||
|
"""Create the tables in the database"""
|
||||||
|
|
||||||
|
self.cursor.execute(f'''
|
||||||
|
CREATE TABLE IF NOT EXISTS {Table.TRACK_INFORMATION.value} (
|
||||||
|
track_id TEXT PRIMARY KEY,
|
||||||
|
title TEXT,
|
||||||
|
duration_ms INTEGER,
|
||||||
|
explicit BOOLEAN,
|
||||||
|
popularity INTEGER
|
||||||
|
);
|
||||||
|
''')
|
||||||
|
|
||||||
|
self.cursor.execute(f'''
|
||||||
|
CREATE TABLE IF NOT EXISTS {Table.ARTIST_INFORMATION.value} (
|
||||||
|
artist_id TEXT PRIMARY KEY,
|
||||||
|
artist_name TEXT,
|
||||||
|
followers INTEGER,
|
||||||
|
genres TEXT,
|
||||||
|
popularity INTEGER
|
||||||
|
);
|
||||||
|
''')
|
||||||
|
|
||||||
|
self.cursor.execute(f'''
|
||||||
|
CREATE TABLE IF NOT EXISTS {Table.ALBUM_INFORMATION.value} (
|
||||||
|
album_id TEXT PRIMARY KEY,
|
||||||
|
album_name TEXT,
|
||||||
|
album_type TEXT,
|
||||||
|
total_tracks INTEGER,
|
||||||
|
release_date TEXT,
|
||||||
|
label TEXT
|
||||||
|
);
|
||||||
|
''')
|
||||||
|
|
||||||
|
self.cursor.execute(f'''
|
||||||
|
CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} (
|
||||||
|
track_id TEXT PRIMARY KEY,
|
||||||
|
attribute_name TEXT,
|
||||||
|
attribute_value TEXT
|
||||||
|
);
|
||||||
|
''')
|
||||||
|
|
||||||
|
self.cursor.execute(f'''
|
||||||
|
CREATE TABLE IF NOT EXISTS {Table.RECENTLY_PLAYED.value} (
|
||||||
|
played_at TIMESTAMP PRIMARY KEY,
|
||||||
|
track_id TEXT,
|
||||||
|
artist_id TEXT,
|
||||||
|
album_id TEXT,
|
||||||
|
FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id),
|
||||||
|
FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id),
|
||||||
|
FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id)
|
||||||
|
);
|
||||||
|
''')
|
||||||
|
|
||||||
|
# Commit the changes
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def add_row(self, table: Table, values):
|
||||||
|
"""Add a new row into the specified table"""
|
||||||
|
try:
|
||||||
|
placeholders = ', '.join(['?'] * len(values))
|
||||||
|
query = f"INSERT INTO {table.value} VALUES ({placeholders})"
|
||||||
|
self.cursor.execute(query, values)
|
||||||
|
self.conn.commit()
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(f"Error: {e}")
|
||||||
|
|
||||||
|
def read_all_rows(self, table: Table, column: str = "*"):
|
||||||
|
"""Read all rows from the specified table"""
|
||||||
|
self.cursor.execute(f"SELECT {column} FROM {table.value}")
|
||||||
|
rows = self.cursor.fetchall()
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""Close the database connection"""
|
||||||
|
self.conn.close()
|
||||||
|
|
||||||
|
def get_total_overview(self) -> list:
|
||||||
|
"""Retrieve a total overview of all recently played songs with full details"""
|
||||||
|
try:
|
||||||
|
# Join recently_played with track_information, artist_information, and album_information
|
||||||
|
query = f'''
|
||||||
|
SELECT rp.played_at,
|
||||||
|
ti.track_id,
|
||||||
|
ti.title,
|
||||||
|
ai.artist_id,
|
||||||
|
ai.artist_name,
|
||||||
|
al.album_id,
|
||||||
|
al.album_name
|
||||||
|
FROM {Table.RECENTLY_PLAYED.value} rp
|
||||||
|
JOIN {Table.TRACK_INFORMATION.value} ti ON rp.track_id = ti.track_id
|
||||||
|
JOIN {Table.ARTIST_INFORMATION.value} ai ON rp.artist_id = ai.artist_id
|
||||||
|
JOIN {Table.ALBUM_INFORMATION.value} al ON rp.album_id = al.album_id
|
||||||
|
ORDER BY rp.played_at DESC
|
||||||
|
'''
|
||||||
|
self.cursor.execute(query)
|
||||||
|
rows = self.cursor.fetchall()
|
||||||
|
return rows
|
||||||
|
except Exception as e:
|
||||||
|
log.error(f"Error retrieving total overview: {e}")
|
||||||
|
return []
|
||||||
Vendored
+3
@@ -0,0 +1,3 @@
|
|||||||
|
SPOTIFY_CLIENT_ID=your_token_here
|
||||||
|
SPOTIFY_CLIENT_SECRET=your_token_here
|
||||||
|
SPOTIFY_REDIRECT_URI=http://localhost:8888/callback
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
from time import sleep
|
||||||
|
|
||||||
|
from scraper import scraping
|
||||||
|
|
||||||
|
# Run forever on intervals of 30 minutes
|
||||||
|
while True:
|
||||||
|
scraping()
|
||||||
|
sleep(1800)
|
||||||
+150
@@ -0,0 +1,150 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
from auth import authenticate, simple_authenticate
|
||||||
|
from database_handler import Database, Table
|
||||||
|
|
||||||
|
db = Database('spotify_scraped.db')
|
||||||
|
|
||||||
|
|
||||||
|
def scraping():
|
||||||
|
"""
|
||||||
|
This function is the main function that will be executed when the script is run
|
||||||
|
"""
|
||||||
|
global db
|
||||||
|
|
||||||
|
scope = "user-read-recently-played"
|
||||||
|
bearer_token = authenticate(scope)
|
||||||
|
|
||||||
|
# Once each 30 mins
|
||||||
|
_read_recently_played_page_and_add_to_db(bearer_token=bearer_token)
|
||||||
|
_scrape_missing_infos()
|
||||||
|
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _read_recently_played_page_and_add_to_db(bearer_token: str):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
global db
|
||||||
|
|
||||||
|
last_played_track = _get_last_played_track(bearer_token=bearer_token)
|
||||||
|
|
||||||
|
for track in last_played_track['items']:
|
||||||
|
track_id = track['track']['id']
|
||||||
|
played_at = track['played_at']
|
||||||
|
album_id = track['track']['album']['id']
|
||||||
|
artist_id = track['track']['artists'][0]['id']
|
||||||
|
db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict:
|
||||||
|
"""
|
||||||
|
This function returns the last played track based on the limit size
|
||||||
|
|
||||||
|
:param limit: str
|
||||||
|
:param bearer_token: str
|
||||||
|
:return: dict
|
||||||
|
"""
|
||||||
|
|
||||||
|
header = {
|
||||||
|
'Authorization': f'Bearer {bearer_token}'
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(url, headers=header)
|
||||||
|
response_json = response.json()
|
||||||
|
return response_json
|
||||||
|
|
||||||
|
|
||||||
|
def _get_track_information(track_id: str, bearer_token: str) -> dict:
|
||||||
|
"""
|
||||||
|
This function returns the track information based on the track id
|
||||||
|
|
||||||
|
:param track_id: str
|
||||||
|
:param bearer_token: str
|
||||||
|
:return: dict
|
||||||
|
"""
|
||||||
|
|
||||||
|
url = f"https://api.spotify.com/v1/tracks/{track_id}"
|
||||||
|
header = {
|
||||||
|
'Authorization': f'Bearer {bearer_token}'
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(url, headers=header)
|
||||||
|
response_json = response.json()
|
||||||
|
return response_json
|
||||||
|
|
||||||
|
|
||||||
|
def _get_artist_information(artist_id: str, bearer_token: str) -> dict:
|
||||||
|
"""
|
||||||
|
This function returns the artist information based on the artist id
|
||||||
|
|
||||||
|
:param artist_id: str
|
||||||
|
:param bearer_token: str
|
||||||
|
:return: dict
|
||||||
|
"""
|
||||||
|
|
||||||
|
url = f"https://api.spotify.com/v1/artists/{artist_id}"
|
||||||
|
header = {
|
||||||
|
'Authorization': f'Bearer {bearer_token}'
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(url, headers=header)
|
||||||
|
response_json = response.json()
|
||||||
|
return response_json
|
||||||
|
|
||||||
|
|
||||||
|
def _get_album_information(album_id: str, bearer_token: str) -> dict:
|
||||||
|
"""
|
||||||
|
This function returns the album information based on the album id
|
||||||
|
|
||||||
|
:param album_id: str
|
||||||
|
:param bearer_token: str
|
||||||
|
:return: dict
|
||||||
|
"""
|
||||||
|
|
||||||
|
url = f"https://api.spotify.com/v1/albums/{album_id}"
|
||||||
|
header = {
|
||||||
|
'Authorization': f'Bearer {bearer_token}'
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.get(url, headers=header)
|
||||||
|
response_json = response.json()
|
||||||
|
return response_json
|
||||||
|
|
||||||
|
|
||||||
|
def _scrape_missing_infos():
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
global db
|
||||||
|
|
||||||
|
bearer_token_simple = simple_authenticate()
|
||||||
|
|
||||||
|
# Track Info
|
||||||
|
all_track_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id')
|
||||||
|
all_track_ids_saved = db.read_all_rows(Table.TRACK_INFORMATION, 'track_id')
|
||||||
|
all_track_ids_missing = list(set(all_track_ids_recently_played) - set(all_track_ids_saved))
|
||||||
|
for track_id in all_track_ids_missing:
|
||||||
|
response = _get_track_information(track_id=track_id[0], bearer_token=bearer_token_simple)
|
||||||
|
db.add_row(Table.TRACK_INFORMATION, (response['id'], response['name'], response['duration_ms'], response['explicit'], response['popularity']))
|
||||||
|
# Album Info
|
||||||
|
all_album_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'album_id')
|
||||||
|
all_album_ids_saved = db.read_all_rows(Table.ALBUM_INFORMATION, 'album_id')
|
||||||
|
all_album_ids_missing = list(set(all_album_ids_recently_played) - set(all_album_ids_saved))
|
||||||
|
for album_id in all_album_ids_missing:
|
||||||
|
response = _get_album_information(album_id=album_id[0], bearer_token=bearer_token_simple)
|
||||||
|
try:
|
||||||
|
release_year = response['release_date'][:4]
|
||||||
|
except Exception:
|
||||||
|
release_year = ""
|
||||||
|
db.add_row(Table.ALBUM_INFORMATION, (response['id'], response['name'], response['album_type'], response['total_tracks'], release_year, response['label']))
|
||||||
|
# Artist Info
|
||||||
|
all_artist_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'artist_id')
|
||||||
|
all_artist_ids_saved = db.read_all_rows(Table.ARTIST_INFORMATION, 'artist_id')
|
||||||
|
all_artist_ids_missing = list(set(all_artist_ids_recently_played) - set(all_artist_ids_saved))
|
||||||
|
for artist_id in all_artist_ids_missing:
|
||||||
|
response = _get_artist_information(artist_id=artist_id[0], bearer_token=bearer_token_simple)
|
||||||
|
try:
|
||||||
|
genre = response['genres'][0]
|
||||||
|
except IndexError:
|
||||||
|
genre = ""
|
||||||
|
db.add_row(Table.ARTIST_INFORMATION, (response['id'], response['name'], response['followers']['total'], genre, response['popularity']))
|
||||||
Executable
+14
@@ -0,0 +1,14 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Starup the predictify scraper
|
||||||
|
|
||||||
|
if test -f ./requirements.txt
|
||||||
|
then
|
||||||
|
python3 -m venv .venv
|
||||||
|
.venv/bin/pip install -r ./requirements.txt
|
||||||
|
else
|
||||||
|
printf "Missing requirements file! aborting...\n"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
.venv/bin/python3 src/scraper.py
|
||||||
Reference in New Issue
Block a user