mirror of
https://github.com/agresdominik/predictify.git
synced 2026-04-21 09:51:56 +00:00
+13
-2
@@ -1,3 +1,15 @@
|
||||
# Test running file
|
||||
main_test.py
|
||||
|
||||
# databases
|
||||
*.db
|
||||
|
||||
# Custom Tokens file/rotator
|
||||
tokens.json
|
||||
|
||||
# Visual Studio Code
|
||||
.vscode/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
@@ -129,10 +141,9 @@ celerybeat.pid
|
||||
|
||||
# Environments
|
||||
.env
|
||||
!.env.example
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
# .pre-commit-config.yaml
|
||||
|
||||
repos:
|
||||
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace # Remove trailing whitespace
|
||||
exclude: '.*test.*'
|
||||
files: \.(py)$
|
||||
|
||||
- id: end-of-file-fixer # Ensure a single newline at the end of a file
|
||||
exclude: '.*test.*'
|
||||
files: \.(py)$
|
||||
|
||||
- id: check-yaml # Check if the YAML files are valid
|
||||
exclude: '.*test.*'
|
||||
files: \.(yaml|yml)$
|
||||
|
||||
- id: check-json # Check if the JSON files are valid
|
||||
exclude: '.*test.*'
|
||||
files: \.(json)$
|
||||
|
||||
- id: check-added-large-files # Prevent large files from being committed
|
||||
args: ['--maxkb=1000']
|
||||
|
||||
- id: check-ast # Check for parse errors in Python files
|
||||
exclude: '.*test.*'
|
||||
files: \.(py)$
|
||||
|
||||
- id: debug-statements # Check for print statements and pdb calls
|
||||
exclude: '.*test.*'
|
||||
files: \.(py)$
|
||||
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.13.2
|
||||
hooks:
|
||||
- id: isort
|
||||
args: ['--profile=black']
|
||||
files: \.(py)$
|
||||
exclude: '.*test.*'
|
||||
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 7.1.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
args: ['--extend-ignore=E501,E402,W503,E721','--max-line-length=100']
|
||||
files: \.(py)$
|
||||
exclude: '.*test.*'
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
FROM alpine:latest
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
RUN apk update && \
|
||||
apk add --no-cache \
|
||||
openssh \
|
||||
python3 \
|
||||
py3-pip \
|
||||
sqlite
|
||||
|
||||
EXPOSE 22
|
||||
|
||||
RUN mkdir /root/src
|
||||
|
||||
COPY ./startup.sh /root
|
||||
COPY ./requirements.txt /root
|
||||
COPY ./src/ /root/src/
|
||||
|
||||
RUN ls -la
|
||||
|
||||
VOLUME /root
|
||||
|
||||
ENTRYPOINT ["/bin/sh", "/root/startup.sh"]
|
||||
@@ -1 +1,27 @@
|
||||
# predictify
|
||||
# Predictify
|
||||
|
||||
## Overview
|
||||
|
||||
A Data analysis tool to scrape your Spotify History usage and let a ML-Model predict your next songs
|
||||
|
||||
## Authentication API
|
||||
|
||||
[Official Documentation](https://developer.spotify.com/documentation/web-api/tutorials/getting-started)
|
||||
[Authorization Code Flow](https://developer.spotify.com/documentation/web-api/tutorials/code-flow)
|
||||
|
||||
## Usable possible APIs
|
||||
|
||||
Recently Played Tracks: /me/player/recently-played [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-recently-played)
|
||||
|
||||
Get Track: /tracks/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-track)
|
||||
|
||||
Get Track's Audio Features - Deprecated: /audio-features/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-features)
|
||||
|
||||
Get Track's Audio Analysis - Deprecated: /audio-analysis/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-audio-analysis)
|
||||
|
||||
Get Artist: /artists/{id} [Official Spotify Documentation](https://developer.spotify.com/documentation/web-api/reference/get-an-artist)
|
||||
|
||||
## Authors
|
||||
|
||||
[Chris Kiriakou](https://github.com/ckiri)
|
||||
[Dominik Agres](https://github.com/agresdominik)
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
# pytest.ini
|
||||
[pytest]
|
||||
# Set the root directory to the current directory (.)
|
||||
rootdir = .
|
||||
pythonpath = .
|
||||
@@ -0,0 +1,6 @@
|
||||
python-dotenv==1.0.1
|
||||
requests==2.32.3
|
||||
pre-commit==4.1.0
|
||||
pytest==8.3.5
|
||||
coverage==7.7.0
|
||||
pytest-cov==6.0.0
|
||||
+257
@@ -0,0 +1,257 @@
|
||||
import base64
|
||||
import json
|
||||
import logging as log
|
||||
import os
|
||||
import time
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from urllib.parse import parse_qs, urlencode, urlparse
|
||||
|
||||
import dotenv
|
||||
import requests
|
||||
|
||||
TOKEN_FILE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'env', 'tokens.json')
|
||||
|
||||
|
||||
def simple_authenticate(grant_type: str = "client_credentials") -> str:
|
||||
"""
|
||||
This function authenticates the user and returns the access token
|
||||
|
||||
:return: str
|
||||
"""
|
||||
spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file()
|
||||
token_url = "https://accounts.spotify.com/api/token"
|
||||
auth_value = f"{spotify_client_id}:{spotify_client_secret}"
|
||||
auth_header = base64.b64encode(auth_value.encode('utf-8')).decode('utf-8')
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Basic {auth_header}",
|
||||
"Content-Type": "application/x-www-form-urlencoded"
|
||||
}
|
||||
|
||||
data = {
|
||||
"grant_type": f"{grant_type}"
|
||||
}
|
||||
|
||||
response = requests.post(token_url, headers=headers, data=data)
|
||||
|
||||
if response.status_code == 200:
|
||||
access_token = response.json().get('access_token')
|
||||
return access_token
|
||||
else:
|
||||
log.error(f"Error {response.status_code}: {response.text}")
|
||||
|
||||
|
||||
def authenticate(scope: str) -> str:
|
||||
"""
|
||||
This function authenticates the user and returns the access token
|
||||
|
||||
:param scope: str
|
||||
:return: str
|
||||
"""
|
||||
spotify_client_id, spotify_client_secret, spotify_redirect_uri = _read_env_file()
|
||||
|
||||
tokens = _load_tokens(scope)
|
||||
if tokens:
|
||||
access_token, refresh_token, expires_at = tokens
|
||||
if time.time() < expires_at:
|
||||
return access_token
|
||||
else:
|
||||
log.info(f"Token for scope {scope} expired, refreshing...")
|
||||
access_token, expires_at = _refresh_access_token(refresh_token, spotify_client_id, spotify_client_secret)
|
||||
_refresh_tokens_file(access_token, scope, expires_at)
|
||||
return access_token
|
||||
|
||||
auth_url = _get_authorization_url(spotify_client_id, spotify_redirect_uri, scope)
|
||||
print(f'Please go to the following URL to authorize the app: {auth_url}')
|
||||
|
||||
authorization_code = _start_server_and_wait_for_code()
|
||||
|
||||
access_token, refresh_token, expires_at = _exchange_code_for_token(authorization_code, redirect_uri=spotify_redirect_uri,
|
||||
client_id=spotify_client_id, client_secret=spotify_client_secret)
|
||||
|
||||
_save_tokens(access_token, refresh_token, scope, expires_at)
|
||||
|
||||
return access_token
|
||||
|
||||
|
||||
def _get_authorization_url(client_id: str, redirect_uri: str, scope: str) -> str:
|
||||
"""
|
||||
This function generates the URL that the user needs to visit to authorize the app
|
||||
|
||||
:param client_id: str
|
||||
:param redirect_uri: str
|
||||
:param scope: str
|
||||
:return: str
|
||||
"""
|
||||
|
||||
auth_params = {
|
||||
"response_type": "code",
|
||||
"client_id": client_id,
|
||||
"scope": scope,
|
||||
"redirect_uri": redirect_uri,
|
||||
"state": str(int(time.time()))
|
||||
}
|
||||
auth_url = "https://accounts.spotify.com/authorize?" + urlencode(auth_params)
|
||||
return auth_url
|
||||
|
||||
|
||||
def _read_env_file() -> tuple:
|
||||
"""
|
||||
This function reads the .env file and returns the client_id, client_secret and redirect_uri
|
||||
|
||||
:return: tuple
|
||||
"""
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
dotenv_folder_path = os.path.join(current_dir, 'env')
|
||||
dotenv_path = os.path.join(dotenv_folder_path, '.env')
|
||||
contents = dotenv.dotenv_values(dotenv_path=dotenv_path)
|
||||
spotify_client_id = contents['SPOTIFY_CLIENT_ID']
|
||||
spotify_client_secret = contents['SPOTIFY_CLIENT_SECRET']
|
||||
spotify_redirect_uri = contents['SPOTIFY_REDIRECT_URI']
|
||||
return spotify_client_id, spotify_client_secret, spotify_redirect_uri
|
||||
|
||||
|
||||
def _start_server_and_wait_for_code() -> any:
|
||||
"""
|
||||
This function starts a server and waits for the user to visit the authorization URL
|
||||
and get the authorization code
|
||||
|
||||
:return: any
|
||||
"""
|
||||
class CallbackHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
parsed_url = urlparse(self.path)
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
if 'code' in query_params:
|
||||
self.server.authorization_code = query_params['code'][0]
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
self.wfile.write(b"Authorization successful! You can close this window.")
|
||||
|
||||
server = HTTPServer(('localhost', 8888), CallbackHandler)
|
||||
log.info("Starting server to capture the authorization code...")
|
||||
server.handle_request()
|
||||
return server.authorization_code
|
||||
|
||||
|
||||
def _exchange_code_for_token(code: str, redirect_uri: str, client_id: str, client_secret: str) -> tuple:
|
||||
"""
|
||||
This function exchanges the authorization code for an access token
|
||||
|
||||
:param code: str
|
||||
:param redirect_uri: str
|
||||
:param client_id: str
|
||||
:param client_secret: str
|
||||
:return: tuple
|
||||
"""
|
||||
|
||||
token_url = "https://accounts.spotify.com/api/token"
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}
|
||||
|
||||
data = {
|
||||
'grant_type': 'authorization_code',
|
||||
'code': code,
|
||||
'redirect_uri': redirect_uri,
|
||||
'client_id': client_id,
|
||||
'client_secret': client_secret,
|
||||
}
|
||||
|
||||
response = requests.post(token_url, data=data, headers=headers)
|
||||
response_data = response.json()
|
||||
|
||||
if 'access_token' not in response_data:
|
||||
raise Exception("Failed to get access token")
|
||||
|
||||
access_token = response_data['access_token']
|
||||
refresh_token = response_data.get('refresh_token', None)
|
||||
expires_in = response_data['expires_in']
|
||||
expires_at = time.time() + expires_in
|
||||
return access_token, refresh_token, expires_at
|
||||
|
||||
|
||||
def _refresh_access_token(refresh_token: str, client_id: str, client_secret: str) -> tuple:
|
||||
"""
|
||||
Refreshes the access token using the refresh token.
|
||||
|
||||
:param refresh_token: str
|
||||
:param client_id: str
|
||||
:param client_secret: str
|
||||
:return: tuple
|
||||
"""
|
||||
token_url = "https://accounts.spotify.com/api/token"
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}
|
||||
|
||||
data = {
|
||||
'grant_type': 'refresh_token',
|
||||
'refresh_token': refresh_token,
|
||||
'client_id': client_id,
|
||||
'client_secret': client_secret,
|
||||
}
|
||||
|
||||
response = requests.post(token_url, data=data, headers=headers)
|
||||
response_data = response.json()
|
||||
|
||||
if 'access_token' not in response_data:
|
||||
raise Exception("Failed to refresh access token")
|
||||
|
||||
access_token = response_data['access_token']
|
||||
expires_in = response_data['expires_in']
|
||||
expires_at = time.time() + expires_in
|
||||
return access_token, expires_at
|
||||
|
||||
|
||||
def _load_tokens(scope: str) -> tuple:
|
||||
"""
|
||||
Loads the tokens from the local file if they exist and are still valid.
|
||||
|
||||
:return: tuple or None
|
||||
"""
|
||||
if os.path.exists(TOKEN_FILE_PATH):
|
||||
with open(TOKEN_FILE_PATH, 'r') as f:
|
||||
tokens = json.load(f)
|
||||
if scope in tokens:
|
||||
if 'access_token' in tokens[scope] and 'expires_at' in tokens[scope] and 'expires_at' in tokens[scope]:
|
||||
return tokens[scope]['access_token'], tokens[scope]['refresh_token'], tokens[scope]['expires_at']
|
||||
return None
|
||||
|
||||
|
||||
def _save_tokens(access_token: str, refresh_token: str, scope: str, expires_at) -> None:
|
||||
"""
|
||||
Saves the access and refresh tokens to a local file.
|
||||
|
||||
:param access_token: str
|
||||
:param refresh_token: str
|
||||
:param scope: str
|
||||
"""
|
||||
tokens = {
|
||||
scope: {
|
||||
'access_token': access_token,
|
||||
'refresh_token': refresh_token,
|
||||
'expires_at': expires_at
|
||||
},
|
||||
}
|
||||
with open(TOKEN_FILE_PATH, 'w') as f:
|
||||
json.dump(tokens, f)
|
||||
|
||||
|
||||
def _refresh_tokens_file(access_token: str, scope: str, expires_at) -> None:
|
||||
"""
|
||||
Saves the access and refresh tokens to a local file.
|
||||
|
||||
:param access_token: str
|
||||
:param scope: str
|
||||
"""
|
||||
with open(TOKEN_FILE_PATH, 'r') as file:
|
||||
tokens = json.load(file)
|
||||
|
||||
if scope in tokens and 'refresh_token' in tokens[scope]:
|
||||
tokens[scope]['access_token'] = access_token
|
||||
tokens[scope]['expires_at'] = expires_at
|
||||
with open(TOKEN_FILE_PATH, 'w') as file:
|
||||
json.dump(tokens, file, indent=4)
|
||||
else:
|
||||
log.error(f"Error: Scope '{scope}' or refresh_token not found in the tokens file.")
|
||||
@@ -0,0 +1,126 @@
|
||||
import logging as log
|
||||
import sqlite3
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Table(Enum):
|
||||
TRACK_INFORMATION = "track_information"
|
||||
ARTIST_INFORMATION = "artist_information"
|
||||
ALBUM_INFORMATION = "album_information"
|
||||
TRACK_ATTRIBUTES = "track_attributes"
|
||||
RECENTLY_PLAYED = "recently_played"
|
||||
|
||||
|
||||
class Database:
|
||||
"""
|
||||
A class to handle the database connection and operations
|
||||
"""
|
||||
|
||||
def __init__(self, db_name):
|
||||
"""Initialize the connection to the database"""
|
||||
self.db_name = db_name
|
||||
self.conn = sqlite3.connect(db_name)
|
||||
self.cursor = self.conn.cursor()
|
||||
self.create_tables()
|
||||
|
||||
def create_tables(self):
|
||||
"""Create the tables in the database"""
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.TRACK_INFORMATION.value} (
|
||||
track_id TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
duration_ms INTEGER,
|
||||
explicit BOOLEAN,
|
||||
popularity INTEGER
|
||||
);
|
||||
''')
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.ARTIST_INFORMATION.value} (
|
||||
artist_id TEXT PRIMARY KEY,
|
||||
artist_name TEXT,
|
||||
followers INTEGER,
|
||||
genres TEXT,
|
||||
popularity INTEGER
|
||||
);
|
||||
''')
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.ALBUM_INFORMATION.value} (
|
||||
album_id TEXT PRIMARY KEY,
|
||||
album_name TEXT,
|
||||
album_type TEXT,
|
||||
total_tracks INTEGER,
|
||||
release_date TEXT,
|
||||
label TEXT
|
||||
);
|
||||
''')
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} (
|
||||
track_id TEXT PRIMARY KEY,
|
||||
attribute_name TEXT,
|
||||
attribute_value TEXT
|
||||
);
|
||||
''')
|
||||
|
||||
self.cursor.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {Table.RECENTLY_PLAYED.value} (
|
||||
played_at TIMESTAMP PRIMARY KEY,
|
||||
track_id TEXT,
|
||||
artist_id TEXT,
|
||||
album_id TEXT,
|
||||
FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id),
|
||||
FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id),
|
||||
FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id)
|
||||
);
|
||||
''')
|
||||
|
||||
# Commit the changes
|
||||
self.conn.commit()
|
||||
|
||||
def add_row(self, table: Table, values):
|
||||
"""Add a new row into the specified table"""
|
||||
try:
|
||||
placeholders = ', '.join(['?'] * len(values))
|
||||
query = f"INSERT INTO {table.value} VALUES ({placeholders})"
|
||||
self.cursor.execute(query, values)
|
||||
self.conn.commit()
|
||||
except Exception as e:
|
||||
log.debug(f"Error: {e}")
|
||||
|
||||
def read_all_rows(self, table: Table, column: str = "*"):
|
||||
"""Read all rows from the specified table"""
|
||||
self.cursor.execute(f"SELECT {column} FROM {table.value}")
|
||||
rows = self.cursor.fetchall()
|
||||
return rows
|
||||
|
||||
def close(self):
|
||||
"""Close the database connection"""
|
||||
self.conn.close()
|
||||
|
||||
def get_total_overview(self) -> list:
|
||||
"""Retrieve a total overview of all recently played songs with full details"""
|
||||
try:
|
||||
# Join recently_played with track_information, artist_information, and album_information
|
||||
query = f'''
|
||||
SELECT rp.played_at,
|
||||
ti.track_id,
|
||||
ti.title,
|
||||
ai.artist_id,
|
||||
ai.artist_name,
|
||||
al.album_id,
|
||||
al.album_name
|
||||
FROM {Table.RECENTLY_PLAYED.value} rp
|
||||
JOIN {Table.TRACK_INFORMATION.value} ti ON rp.track_id = ti.track_id
|
||||
JOIN {Table.ARTIST_INFORMATION.value} ai ON rp.artist_id = ai.artist_id
|
||||
JOIN {Table.ALBUM_INFORMATION.value} al ON rp.album_id = al.album_id
|
||||
ORDER BY rp.played_at DESC
|
||||
'''
|
||||
self.cursor.execute(query)
|
||||
rows = self.cursor.fetchall()
|
||||
return rows
|
||||
except Exception as e:
|
||||
log.error(f"Error retrieving total overview: {e}")
|
||||
return []
|
||||
Vendored
+3
@@ -0,0 +1,3 @@
|
||||
SPOTIFY_CLIENT_ID=your_token_here
|
||||
SPOTIFY_CLIENT_SECRET=your_token_here
|
||||
SPOTIFY_REDIRECT_URI=http://localhost:8888/callback
|
||||
@@ -0,0 +1,8 @@
|
||||
from time import sleep
|
||||
|
||||
from scraper import scraping
|
||||
|
||||
# Run forever on intervals of 30 minutes
|
||||
while True:
|
||||
scraping()
|
||||
sleep(1800)
|
||||
+150
@@ -0,0 +1,150 @@
|
||||
import requests
|
||||
|
||||
from auth import authenticate, simple_authenticate
|
||||
from database_handler import Database, Table
|
||||
|
||||
db = Database('spotify_scraped.db')
|
||||
|
||||
|
||||
def scraping():
|
||||
"""
|
||||
This function is the main function that will be executed when the script is run
|
||||
"""
|
||||
global db
|
||||
|
||||
scope = "user-read-recently-played"
|
||||
bearer_token = authenticate(scope)
|
||||
|
||||
# Once each 30 mins
|
||||
_read_recently_played_page_and_add_to_db(bearer_token=bearer_token)
|
||||
_scrape_missing_infos()
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
def _read_recently_played_page_and_add_to_db(bearer_token: str):
|
||||
"""
|
||||
"""
|
||||
global db
|
||||
|
||||
last_played_track = _get_last_played_track(bearer_token=bearer_token)
|
||||
|
||||
for track in last_played_track['items']:
|
||||
track_id = track['track']['id']
|
||||
played_at = track['played_at']
|
||||
album_id = track['track']['album']['id']
|
||||
artist_id = track['track']['artists'][0]['id']
|
||||
db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
|
||||
|
||||
|
||||
def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict:
|
||||
"""
|
||||
This function returns the last played track based on the limit size
|
||||
|
||||
:param limit: str
|
||||
:param bearer_token: str
|
||||
:return: dict
|
||||
"""
|
||||
|
||||
header = {
|
||||
'Authorization': f'Bearer {bearer_token}'
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=header)
|
||||
response_json = response.json()
|
||||
return response_json
|
||||
|
||||
|
||||
def _get_track_information(track_id: str, bearer_token: str) -> dict:
|
||||
"""
|
||||
This function returns the track information based on the track id
|
||||
|
||||
:param track_id: str
|
||||
:param bearer_token: str
|
||||
:return: dict
|
||||
"""
|
||||
|
||||
url = f"https://api.spotify.com/v1/tracks/{track_id}"
|
||||
header = {
|
||||
'Authorization': f'Bearer {bearer_token}'
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=header)
|
||||
response_json = response.json()
|
||||
return response_json
|
||||
|
||||
|
||||
def _get_artist_information(artist_id: str, bearer_token: str) -> dict:
|
||||
"""
|
||||
This function returns the artist information based on the artist id
|
||||
|
||||
:param artist_id: str
|
||||
:param bearer_token: str
|
||||
:return: dict
|
||||
"""
|
||||
|
||||
url = f"https://api.spotify.com/v1/artists/{artist_id}"
|
||||
header = {
|
||||
'Authorization': f'Bearer {bearer_token}'
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=header)
|
||||
response_json = response.json()
|
||||
return response_json
|
||||
|
||||
|
||||
def _get_album_information(album_id: str, bearer_token: str) -> dict:
|
||||
"""
|
||||
This function returns the album information based on the album id
|
||||
|
||||
:param album_id: str
|
||||
:param bearer_token: str
|
||||
:return: dict
|
||||
"""
|
||||
|
||||
url = f"https://api.spotify.com/v1/albums/{album_id}"
|
||||
header = {
|
||||
'Authorization': f'Bearer {bearer_token}'
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=header)
|
||||
response_json = response.json()
|
||||
return response_json
|
||||
|
||||
|
||||
def _scrape_missing_infos():
|
||||
"""
|
||||
"""
|
||||
global db
|
||||
|
||||
bearer_token_simple = simple_authenticate()
|
||||
|
||||
# Track Info
|
||||
all_track_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id')
|
||||
all_track_ids_saved = db.read_all_rows(Table.TRACK_INFORMATION, 'track_id')
|
||||
all_track_ids_missing = list(set(all_track_ids_recently_played) - set(all_track_ids_saved))
|
||||
for track_id in all_track_ids_missing:
|
||||
response = _get_track_information(track_id=track_id[0], bearer_token=bearer_token_simple)
|
||||
db.add_row(Table.TRACK_INFORMATION, (response['id'], response['name'], response['duration_ms'], response['explicit'], response['popularity']))
|
||||
# Album Info
|
||||
all_album_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'album_id')
|
||||
all_album_ids_saved = db.read_all_rows(Table.ALBUM_INFORMATION, 'album_id')
|
||||
all_album_ids_missing = list(set(all_album_ids_recently_played) - set(all_album_ids_saved))
|
||||
for album_id in all_album_ids_missing:
|
||||
response = _get_album_information(album_id=album_id[0], bearer_token=bearer_token_simple)
|
||||
try:
|
||||
release_year = response['release_date'][:4]
|
||||
except Exception:
|
||||
release_year = ""
|
||||
db.add_row(Table.ALBUM_INFORMATION, (response['id'], response['name'], response['album_type'], response['total_tracks'], release_year, response['label']))
|
||||
# Artist Info
|
||||
all_artist_ids_recently_played = db.read_all_rows(Table.RECENTLY_PLAYED, 'artist_id')
|
||||
all_artist_ids_saved = db.read_all_rows(Table.ARTIST_INFORMATION, 'artist_id')
|
||||
all_artist_ids_missing = list(set(all_artist_ids_recently_played) - set(all_artist_ids_saved))
|
||||
for artist_id in all_artist_ids_missing:
|
||||
response = _get_artist_information(artist_id=artist_id[0], bearer_token=bearer_token_simple)
|
||||
try:
|
||||
genre = response['genres'][0]
|
||||
except IndexError:
|
||||
genre = ""
|
||||
db.add_row(Table.ARTIST_INFORMATION, (response['id'], response['name'], response['followers']['total'], genre, response['popularity']))
|
||||
Executable
+14
@@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Starup the predictify scraper
|
||||
|
||||
if test -f ./requirements.txt
|
||||
then
|
||||
python3 -m venv .venv
|
||||
.venv/bin/pip install -r ./requirements.txt
|
||||
else
|
||||
printf "Missing requirements file! aborting...\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
.venv/bin/python3 src/scraper.py
|
||||
Reference in New Issue
Block a user