diff --git a/src/database_handler.py b/src/database_handler.py new file mode 100644 index 0000000..ce0478e --- /dev/null +++ b/src/database_handler.py @@ -0,0 +1,87 @@ +import sqlite3 +from enum import Enum + + +class Table(Enum): + TRACK_INFORMATION = "track_information" + ARTIST_INFORMATION = "artist_information" + ALBUM_INFORMATION = "album_information" + TRACK_ATTRIBUTES = "track_attributes" + RECENTLY_PLAYED = "recently_played" + + +class Database: + """ + A class to handle the database connection and operations + """ + + def __init__(self, db_name): + """Initialize the connection to the database""" + self.db_name = db_name + self.conn = sqlite3.connect(db_name) + self.cursor = self.conn.cursor() + self.create_tables() + + def create_tables(self): + """Create the tables in the database""" + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.TRACK_INFORMATION.value} ( + track_id TEXT PRIMARY KEY, + title TEXT + ); + ''') + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.ARTIST_INFORMATION.value} ( + artist_id TEXT PRIMARY KEY, + artist_name TEXT + ); + ''') + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.ALBUM_INFORMATION.value} ( + album_id TEXT PRIMARY KEY, + album_name TEXT + ); + ''') + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.TRACK_ATTRIBUTES.value} ( + track_id TEXT PRIMARY KEY, + attribute_name TEXT, + attribute_value TEXT + ); + ''') + + self.cursor.execute(f''' + CREATE TABLE IF NOT EXISTS {Table.RECENTLY_PLAYED.value} ( + played_at TIMESTAMP PRIMARY KEY, + track_id TEXT, + artist_id TEXT, + album_id TEXT, + FOREIGN KEY (track_id) REFERENCES {Table.TRACK_INFORMATION.value}(track_id), + FOREIGN KEY (artist_id) REFERENCES {Table.ARTIST_INFORMATION.value}(artist_id), + FOREIGN KEY (album_id) REFERENCES {Table.ALBUM_INFORMATION.value}(album_id) + ); + ''') + + # Commit the changes + self.conn.commit() + + def add_row(self, table: Table, values): + """Add a new row into the specified table""" + placeholders = ', '.join(['?'] * len(values)) + query = f"INSERT INTO {table.value} VALUES ({placeholders})" + self.cursor.execute(query, values) + self.conn.commit() + + def read_all_rows(self, table: Table, column: str = "*"): + """Read all rows from the specified table""" + self.cursor.execute(f"SELECT {column} FROM {table.value}") + rows = self.cursor.fetchall() + return rows + + def close(self): + """Close the database connection""" + self.conn.close() diff --git a/src/scraper.py b/src/scraper.py index fabb82c..c2144ce 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -1,19 +1,49 @@ import requests from auth import authenticate +from database_handler import Database, Table + +db = Database('spotify_scraped.db') def main(): """ This function is the main function that will be executed when the script is run """ + global db + scope = "user-read-recently-played" bearer_token = authenticate(scope) + + # Once each 30 mins + _read_recently_played_page_and_add_to_db(bearer_token=bearer_token) + + # Once a day + all_track_ids = db.read_all_rows(Table.RECENTLY_PLAYED, 'track_id') + for track_id in all_track_ids: + response = _get_track_information(track_id=track_id, bearer_token=bearer_token) + print(response) + + # Close the database connection + db.close() + + +def _read_recently_played_page_and_add_to_db(bearer_token: str): + """ + """ + global db + last_played_track = _get_last_played_track(bearer_token=bearer_token) - print(last_played_track) + + for track in last_played_track['items']: + track_id = track['track']['id'] + played_at = track['played_at'] + album_id = track['track']['album']['id'] + artist_id = track['track']['artists'][0]['id'] + db.add_row(Table.RECENTLY_PLAYED, (played_at, track_id, artist_id, album_id)) -def _get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: +def _get_last_played_track(url: str = "https://api.spotify.com/v1/me/player/recently-played?limit=50", bearer_token: str = "") -> dict: """ This function returns the last played track based on the limit size @@ -26,7 +56,26 @@ def _get_last_played_track(limit: str = "1", bearer_token: str = "") -> dict: 'Authorization': f'Bearer {bearer_token}' } - response = requests.get(f'https://api.spotify.com/v1/me/player/recently-played?limit={limit}', headers=header) + response = requests.get(url, headers=header) + response_json = response.json() + return response_json + + +def _get_track_information(track_id: str, bearer_token: str) -> dict: + """ + This function returns the track information based on the track id + + :param track_id: str + :param bearer_token: str + :return: dict + """ + + url = f"https://api.spotify.com/v1/tracks/{track_id}" + header = { + 'Authorization': f'Bearer {bearer_token}' + } + + response = requests.get(url, headers=header) response_json = response.json() return response_json