Logging and documentation in scraper

This commit is contained in:
agres
2025-03-23 23:16:26 +01:00
parent f714200225
commit 87f84b250e
2 changed files with 84 additions and 15 deletions
+56
View File
@@ -0,0 +1,56 @@
import logging
import os
from logging.handlers import RotatingFileHandler
from pathlib import Path
class LoggerWrapper():
def __init__(self, logger_name: str = "standard_logger"):
self.logger = logging.getLogger(logger_name)
self.logger.setLevel(logging.DEBUG)
self.setup_logger()
def setup_logger(self):
# Define and create folder
logs_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'logs')
Path(logs_folder).mkdir(parents=True, exist_ok=True)
# Define file path
log_file = log_file = os.path.join(logs_folder, 'predictify.log')
# Setup File Handler
handler = RotatingFileHandler(log_file, maxBytes=1000000, backupCount=5)
handler.setLevel(logging.DEBUG)
# Setup Console Handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.WARNING)
# Setup Formatter
formatter = logging.Formatter('%(asctime)s - [%(filename)s:%(lineno)d] - %(levelname)s - %(message)s')
# Add Formatters to Handlers
handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# Add Handlers to Logger
self.logger.addHandler(handler)
self.logger.addHandler(console_handler)
def info(self, message):
self.logger.info(message)
def debug(self, message):
self.logger.debug(message)
def warning(self, message):
self.logger.warning(message)
def error(self, message):
self.logger.error(message)
def critical(self, message):
self.logger.critical(message)
# Here we can add alerting/handling
+22 -9
View File
@@ -1,16 +1,17 @@
from auth import authenticate, simple_authenticate from auth import authenticate, simple_authenticate
from database_handler import Database, Table from database_handler import Database, Table
from logger import LoggerWrapper
from spotify_api import get_last_played_track, get_multiple_field_information from spotify_api import get_last_played_track, get_multiple_field_information
# Define DB # Define DB
db = Database() db = Database()
log = LoggerWrapper()
def scraping(): def scraping() -> None:
""" """
This function is the main function that will be executed when the script is run This function is the main function that will be executed when the script is run
""" """
global db
scope = "user-read-recently-played" scope = "user-read-recently-played"
bearer_token = authenticate(scope) bearer_token = authenticate(scope)
@@ -19,7 +20,7 @@ def scraping():
scrape_missing_infos() scrape_missing_infos()
def _read_recently_played_page_and_add_to_db(bearer_token: str): def _read_recently_played_page_and_add_to_db(bearer_token: str) -> None:
""" """
This function gets a list of song play history and adds it into the database. This function gets a list of song play history and adds it into the database.
""" """
@@ -27,26 +28,30 @@ def _read_recently_played_page_and_add_to_db(bearer_token: str):
last_played_track = get_last_played_track(bearer_token=bearer_token) last_played_track = get_last_played_track(bearer_token=bearer_token)
try:
for track in reversed(last_played_track['items']): for track in reversed(last_played_track['items']):
track_id = track['track']['id'] track_id = track['track']['id']
played_at = track['played_at'] played_at = track['played_at']
album_id = track['track']['album']['id'] album_id = track['track']['album']['id']
artist_id = track['track']['artists'][0]['id'] artist_id = track['track']['artists'][0]['id']
db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id)) db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id))
except Exception as e:
log.error(f"Failed to add returned play history to database: {e}"
f"\nReturned Value: {last_played_track}")
def scrape_missing_infos(): def scrape_missing_infos() -> None:
""" """
""" """
bearer_token_simple = simple_authenticate() bearer_token_simple = simple_authenticate()
_scrape_missing_info(bearer_token_simple, Table.TRACK_INFORMATION, 'track_id', 'tracks') _process_missing_info(bearer_token_simple, Table.TRACK_INFORMATION, 'track_id', 'tracks')
_scrape_missing_info(bearer_token_simple, Table.ALBUM_INFORMATION, 'album_id', 'albums') _process_missing_info(bearer_token_simple, Table.ALBUM_INFORMATION, 'album_id', 'albums')
_scrape_missing_info(bearer_token_simple, Table.ARTIST_INFORMATION, 'artist_id', 'artists') _process_missing_info(bearer_token_simple, Table.ARTIST_INFORMATION, 'artist_id', 'artists')
def _scrape_missing_info(bearer_token_simple: str, table_name: Table, id_field_name: str, endpoint_name: str): def _process_missing_info(bearer_token_simple: str, table_name: Table, id_field_name: str, endpoint_name: str) -> None:
if endpoint_name == 'albums': if endpoint_name == 'albums':
limit = 20 limit = 20
@@ -57,6 +62,8 @@ def _scrape_missing_info(bearer_token_simple: str, table_name: Table, id_field_n
all_ids_saved = db.read_all_rows(table_name, id_field_name) all_ids_saved = db.read_all_rows(table_name, id_field_name)
all_ids_missing = list(set(all_ids_recently_played) - set(all_ids_saved)) all_ids_missing = list(set(all_ids_recently_played) - set(all_ids_saved))
log.debug(f"Number of missing {table_name.name} entries: {len(all_ids_missing)}. Inserting...")
ids = [] ids = []
processed_ids = set() processed_ids = set()
@@ -85,16 +92,20 @@ def _scrape_missing_info(bearer_token_simple: str, table_name: Table, id_field_n
_add_data_to_database(table_name, response) _add_data_to_database(table_name, response)
def _add_data_to_database(table_name: Table, response): def _add_data_to_database(table_name: Table, response) -> None:
global db global db
if table_name == Table.TRACK_INFORMATION: if table_name == Table.TRACK_INFORMATION:
log.debug('Adding track information to database')
for entry in response['tracks']: for entry in response['tracks']:
log.debug(f"Adding track: {entry['name']}")
db.add_row(table_name, (entry['id'], entry['name'], entry['duration_ms'], entry['explicit'], entry['popularity'])) db.add_row(table_name, (entry['id'], entry['name'], entry['duration_ms'], entry['explicit'], entry['popularity']))
elif table_name == Table.ALBUM_INFORMATION: elif table_name == Table.ALBUM_INFORMATION:
log.debug('Adding album information to database')
for entry in response['albums']: for entry in response['albums']:
log.debug(f"Adding album: {entry['name']}")
try: try:
release_year = entry['release_date'][:4] release_year = entry['release_date'][:4]
except Exception: except Exception:
@@ -102,7 +113,9 @@ def _add_data_to_database(table_name: Table, response):
db.add_row(table_name, (entry['id'], entry['name'], entry['album_type'], entry['total_tracks'], release_year, entry['label'])) db.add_row(table_name, (entry['id'], entry['name'], entry['album_type'], entry['total_tracks'], release_year, entry['label']))
elif table_name == Table.ARTIST_INFORMATION: elif table_name == Table.ARTIST_INFORMATION:
log.debug('Adding artist information to database')
for entry in response['artists']: for entry in response['artists']:
log.debug(f"Adding artist: {entry['name']}")
try: try:
genre = entry['genres'][0] genre = entry['genres'][0]
except IndexError: except IndexError: