mirror of
https://github.com/agresdominik/predictify.git
synced 2026-04-21 17:55:49 +00:00
Logging and documentation in scraper
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
import logging
|
||||
import os
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class LoggerWrapper():
|
||||
|
||||
def __init__(self, logger_name: str = "standard_logger"):
|
||||
|
||||
self.logger = logging.getLogger(logger_name)
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
self.setup_logger()
|
||||
|
||||
def setup_logger(self):
|
||||
# Define and create folder
|
||||
logs_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'logs')
|
||||
Path(logs_folder).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Define file path
|
||||
log_file = log_file = os.path.join(logs_folder, 'predictify.log')
|
||||
|
||||
# Setup File Handler
|
||||
handler = RotatingFileHandler(log_file, maxBytes=1000000, backupCount=5)
|
||||
handler.setLevel(logging.DEBUG)
|
||||
|
||||
# Setup Console Handler
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.WARNING)
|
||||
|
||||
# Setup Formatter
|
||||
formatter = logging.Formatter('%(asctime)s - [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s')
|
||||
|
||||
# Add Formatters to Handlers
|
||||
handler.setFormatter(formatter)
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
# Add Handlers to Logger
|
||||
self.logger.addHandler(handler)
|
||||
self.logger.addHandler(console_handler)
|
||||
|
||||
def info(self, message):
|
||||
self.logger.info(message)
|
||||
|
||||
def debug(self, message):
|
||||
self.logger.debug(message)
|
||||
|
||||
def warning(self, message):
|
||||
self.logger.warning(message)
|
||||
|
||||
def error(self, message):
|
||||
self.logger.error(message)
|
||||
|
||||
def critical(self, message):
|
||||
self.logger.critical(message)
|
||||
# Here we can add alerting/handling
|
||||
+22
-9
@@ -1,16 +1,17 @@
|
||||
from auth import authenticate, simple_authenticate
|
||||
from database_handler import Database, Table
|
||||
from logger import LoggerWrapper
|
||||
from spotify_api import get_last_played_track, get_multiple_field_information
|
||||
|
||||
# Define DB
|
||||
db = Database()
|
||||
log = LoggerWrapper()
|
||||
|
||||
|
||||
def scraping():
|
||||
def scraping() -> None:
|
||||
"""
|
||||
This function is the main function that will be executed when the script is run
|
||||
"""
|
||||
global db
|
||||
|
||||
scope = "user-read-recently-played"
|
||||
bearer_token = authenticate(scope)
|
||||
@@ -19,7 +20,7 @@ def scraping():
|
||||
scrape_missing_infos()
|
||||
|
||||
|
||||
def _read_recently_played_page_and_add_to_db(bearer_token: str):
|
||||
def _read_recently_played_page_and_add_to_db(bearer_token: str) -> None:
|
||||
"""
|
||||
This function gets a list of song play history and adds it into the database.
|
||||
"""
|
||||
@@ -27,26 +28,30 @@ def _read_recently_played_page_and_add_to_db(bearer_token: str):
|
||||
|
||||
last_played_track = get_last_played_track(bearer_token=bearer_token)
|
||||
|
||||
try:
|
||||
for track in reversed(last_played_track['items']):
|
||||
track_id = track['track']['id']
|
||||
played_at = track['played_at']
|
||||
album_id = track['track']['album']['id']
|
||||
artist_id = track['track']['artists'][0]['id']
|
||||
db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
|
||||
except Exception as e:
|
||||
log.error(f"Failed to add returned play history to database: {e}"
|
||||
f"\nReturned Value: {last_played_track}")
|
||||
|
||||
|
||||
def scrape_missing_infos():
|
||||
def scrape_missing_infos() -> None:
|
||||
"""
|
||||
|
||||
"""
|
||||
bearer_token_simple = simple_authenticate()
|
||||
|
||||
_scrape_missing_info(bearer_token_simple, Table.TRACK_INFORMATION, 'track_id', 'tracks')
|
||||
_scrape_missing_info(bearer_token_simple, Table.ALBUM_INFORMATION, 'album_id', 'albums')
|
||||
_scrape_missing_info(bearer_token_simple, Table.ARTIST_INFORMATION, 'artist_id', 'artists')
|
||||
_process_missing_info(bearer_token_simple, Table.TRACK_INFORMATION, 'track_id', 'tracks')
|
||||
_process_missing_info(bearer_token_simple, Table.ALBUM_INFORMATION, 'album_id', 'albums')
|
||||
_process_missing_info(bearer_token_simple, Table.ARTIST_INFORMATION, 'artist_id', 'artists')
|
||||
|
||||
|
||||
def _scrape_missing_info(bearer_token_simple: str, table_name: Table, id_field_name: str, endpoint_name: str):
|
||||
def _process_missing_info(bearer_token_simple: str, table_name: Table, id_field_name: str, endpoint_name: str) -> None:
|
||||
|
||||
if endpoint_name == 'albums':
|
||||
limit = 20
|
||||
@@ -57,6 +62,8 @@ def _scrape_missing_info(bearer_token_simple: str, table_name: Table, id_field_n
|
||||
all_ids_saved = db.read_all_rows(table_name, id_field_name)
|
||||
all_ids_missing = list(set(all_ids_recently_played) - set(all_ids_saved))
|
||||
|
||||
log.debug(f"Number of missing {table_name.name} entries: {len(all_ids_missing)}. Inserting...")
|
||||
|
||||
ids = []
|
||||
processed_ids = set()
|
||||
|
||||
@@ -85,16 +92,20 @@ def _scrape_missing_info(bearer_token_simple: str, table_name: Table, id_field_n
|
||||
_add_data_to_database(table_name, response)
|
||||
|
||||
|
||||
def _add_data_to_database(table_name: Table, response):
|
||||
def _add_data_to_database(table_name: Table, response) -> None:
|
||||
|
||||
global db
|
||||
|
||||
if table_name == Table.TRACK_INFORMATION:
|
||||
log.debug('Adding track information to database')
|
||||
for entry in response['tracks']:
|
||||
log.debug(f"Adding track: {entry['name']}")
|
||||
db.add_row(table_name, (entry['id'], entry['name'], entry['duration_ms'], entry['explicit'], entry['popularity']))
|
||||
|
||||
elif table_name == Table.ALBUM_INFORMATION:
|
||||
log.debug('Adding album information to database')
|
||||
for entry in response['albums']:
|
||||
log.debug(f"Adding album: {entry['name']}")
|
||||
try:
|
||||
release_year = entry['release_date'][:4]
|
||||
except Exception:
|
||||
@@ -102,7 +113,9 @@ def _add_data_to_database(table_name: Table, response):
|
||||
db.add_row(table_name, (entry['id'], entry['name'], entry['album_type'], entry['total_tracks'], release_year, entry['label']))
|
||||
|
||||
elif table_name == Table.ARTIST_INFORMATION:
|
||||
log.debug('Adding artist information to database')
|
||||
for entry in response['artists']:
|
||||
log.debug(f"Adding artist: {entry['name']}")
|
||||
try:
|
||||
genre = entry['genres'][0]
|
||||
except IndexError:
|
||||
|
||||
Reference in New Issue
Block a user