13.14. Soccer Database#
import pandas as pd
import sqlite3
df = pd.read_csv('all_players.csv')
df['Club'].isna().sum()
np.int64(5493)
conn = sqlite3.connect('all_players.db')
cur = conn.cursor()
df.to_sql('players', conn, if_exists='replace', index=False) # - writes the all_players.df to SQLIte DB
conn.commit()
conn.close()
import pandas as pd
import sqlite3
from sqlite3 import Error
def create_connection(db_file, delete_db=False):
import os
if delete_db and os.path.exists(db_file):
os.remove(db_file)
conn = None
try:
conn = sqlite3.connect(db_file)
conn.execute("PRAGMA foreign_keys = 1")
except Error as e:
print(e)
return conn
def create_table(conn, create_table_sql):
try:
c = conn.cursor()
c.execute(create_table_sql)
except Error as e:
print(e)
def execute_sql_statement(sql_statement, conn):
cur = conn.cursor()
cur.execute(sql_statement)
rows = cur.fetchall()
return rows
display(df)
Player | Club | POS | GP | GS | MINS | Year | Season | |
---|---|---|---|---|---|---|---|---|
0 | Roy Lassiter | TB | F | 30 | 30 | 2580 | 1996 | reg |
1 | Raul Diaz Arce | DC | F | 28 | 28 | 2351 | 1996 | reg |
2 | Eduardo Hurtado | LA | F | 26 | 26 | 2323 | 1996 | reg |
3 | Preki | KC | M | 32 | 32 | 2880 | 1996 | reg |
4 | Brian McBride | CLB | F | 28 | 28 | 2307 | 1996 | reg |
... | ... | ... | ... | ... | ... | ... | ... | ... |
15762 | Alejandro Pozuelo | NaN | M | 1 | 1 | 120 | 2020 | post |
15763 | Franco Jara | NaN | F | 2 | 2 | 210 | 2020 | post |
15764 | Andy Polo | NaN | M | 1 | 1 | 105 | 2020 | post |
15765 | Cristian Roldan | NaN | M | 4 | 4 | 360 | 2020 | post |
15766 | Joevin Jones | NaN | D-M | 4 | 4 | 260 | 2020 | post |
15767 rows × 8 columns
13.15. Tables#
Club
Position
Year
Season
Player
PlayerStats
conn = sqlite3.connect('all_players.db')
sql_statement = "SELECT DISTINCT CLUB FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
clubs = list(map(lambda row: row[0].strip(), rows))
clubs = list(set(clubs))
from collections import Counter
counter = Counter(clubs)
print(counter)
Counter({'ECU': 1, 'VAN': 1, 'COL': 1, 'ROC': 1, 'CHI': 1, 'LAFC': 1, 'USA': 1, 'HAI': 1, 'RSL': 1, 'SJ': 1, 'CHV': 1, 'MTL': 1, 'PAN': 1, 'CIN': 1, 'NSH': 1, 'POR': 1, 'DC': 1, 'MIN': 1, 'HOU': 1, 'SEA': 1, 'KC': 1, 'NYC': 1, 'TOR': 1, 'LFC': 1, 'ORL': 1, 'RBNY': 1, 'MET': 1, 'SKC': 1, 'GHA': 1, 'NYR': 1, 'TB': 1, 'LA': 1, 'PHI': 1, 'CIV': 1, 'SLV': 1, 'MCF': 1, 'CAN': 1, 'MTQ': 1, 'MIA': 1, 'JAM': 1, 'DAL': 1, 'NE': 1, 'HON': 1, 'CLB': 1, 'ATL': 1, 'NY': 1})
# write create statement
# execute create statement
# fetch unique values
# write insert function
# feed unique values into insert function
conn_norm.close()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[9], line 1
----> 1 conn_norm.close()
NameError: name 'conn_norm' is not defined
conn_norm = create_connection('players_norm.db', True)
create_table_sql = """CREATE TABLE IF NOT EXISTS[Clubs] (
[Club] TEXT NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
def insert_club(conn, values):
sql = """INSERT INTO Clubs(Club)
VALUES(?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for club in clubs:
insert_club(conn_norm, (club,))
sql_statement = "SELECT DISTINCT CLUB FROM Clubs"
rows = execute_sql_statement(sql_statement, conn_norm)
print(rows)
[('ECU',), ('VAN',), ('COL',), ('ROC',), ('CHI',), ('LAFC',), ('USA',), ('HAI',), ('RSL',), ('SJ',), ('CHV',), ('MTL',), ('PAN',), ('CIN',), ('NSH',), ('POR',), ('DC',), ('MIN',), ('HOU',), ('SEA',), ('KC',), ('NYC',), ('TOR',), ('LFC',), ('ORL',), ('RBNY',), ('MET',), ('SKC',), ('GHA',), ('NYR',), ('TB',), ('LA',), ('PHI',), ('CIV',), ('SLV',), ('MCF',), ('CAN',), ('MTQ',), ('MIA',), ('JAM',), ('DAL',), ('NE',), ('HON',), ('CLB',), ('ATL',), ('NY',)]
sql_statement = "SELECT DISTINCT POS FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
positions = list(map(lambda row: row[0].strip(), rows))
positions = list(set(positions))
create_table_sql = """CREATE TABLE IF NOT EXISTS[Positions] (
[Position] TEXT NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
def insert_position(conn, values):
sql = """INSERT INTO Positions(Position)
VALUES(?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for position in positions:
insert_position(conn_norm, (position,))
sql_statement = "SELECT DISTINCT Year FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
years = list(map(lambda row: int(row[0]), rows))
years = list(set(years))
create_table_sql = """CREATE TABLE IF NOT EXISTS[Years] (
[Year] INTEGER NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
def insert_year(conn, values):
sql = """INSERT INTO Years(Year)
VALUES(?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for year in years:
insert_year(conn_norm, (year,))
sql_statement = "SELECT DISTINCT season FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
seasons = list(map(lambda row: row[0].strip(), rows))
seasons = list(set(seasons))
create_table_sql = """CREATE TABLE IF NOT EXISTS[Seasons] (
[Season] TEXT NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
def insert_season(conn, values):
sql = """INSERT INTO Seasons(season)
VALUES(?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for season in seasons:
insert_season(conn_norm, (season,))
sql_statement = "SELECT DISTINCT player FROM players"
rows = execute_sql_statement(sql_statement, conn)
players = []
for row in rows:
row = row[0].strip()
name = row.split(' ', 1)
if len(name) == 2:
first, last = name
first = first.strip()
last = last.strip()
players.append((first, last))
elif len(name) == 1:
first = name[0]
players.append((first, None))
create_table_sql = """CREATE TABLE IF NOT EXISTS[Players] (
[PlayerID] INTEGER NOT NULL PRIMARY KEY,
[FirstName] TEXT NOT NULL,
[LastName] TEXT
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
def insert_player(conn, values):
sql = """INSERT INTO Players(FirstName, LastName)
VALUES(?, ?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for player in players:
insert_player(conn_norm, player)
cur = conn_norm.cursor()
cur.execute('DROP TABLE PlayerStats')
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
Cell In[26], line 2
1 cur = conn_norm.cursor()
----> 2 cur.execute('DROP TABLE PlayerStats')
OperationalError: no such table: PlayerStats
create_table_sql = """CREATE TABLE IF NOT EXISTS[PlayerStats] (
[StatID] INTEGER NOT NULL PRIMARY KEY,
[PlayerID] INTEGER NOT NULL,
[Club] TEXT,
[Position] TEXT NOT NULL,
[GP] INTEGER NOT NULL,
[GS] INTEGER NOT NULL,
[MIN] INTEGER NOT NULL,
[Year] INTEGER NOT NULL,
[Season] TEXT NOT NULL,
FOREIGN KEY(PlayerID) REFERENCES Players(PlayerID),
FOREIGN KEY(Club) REFERENCES Clubs(Club),
FOREIGN KEY(Year) REFERENCES Years(Year),
FOREIGN KEY(Season) REFERENCES Seasons(Season)
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
sql_statement = "SELECT DISTINCT * FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows[:10]
[('Roy Lassiter', 'TB ', 'F', 30, 30, 2580, 1996, 'reg'),
('Raul Diaz Arce', 'DC ', 'F', 28, 28, 2351, 1996, 'reg'),
('Eduardo Hurtado', 'LA ', 'F', 26, 26, 2323, 1996, 'reg'),
('Preki', 'KC ', 'M', 32, 32, 2880, 1996, 'reg'),
('Brian McBride', 'CLB', 'F', 28, 28, 2307, 1996, 'reg'),
('Steve Rammel', 'DC ', 'F', 26, 20, 1649, 1996, 'reg'),
('Vitalis Takawira', 'KC ', 'F', 28, 25, 2233, 1996, 'reg'),
('Paul Bravo', 'SJ ', 'M', 31, 31, 2654, 1996, 'reg'),
('Jason Kreis', 'DAL', 'M', 31, 29, 2564, 1996, 'reg'),
('Giovanni Savarese', 'MET', 'F', 26, 15, 1576, 1996, 'reg')]
sql_statement = "SELECT * FROM players"
rows = execute_sql_statement(sql_statement, conn_norm)
player_lookup_dict = {}
for row in rows:
pid, first, last = row
if last:
name = f'{first} {last}'
player_lookup_dict[name] = pid
else:
player_lookup_dict[first] = pid
sql_statement = "SELECT DISTINCT * FROM players"
rows = execute_sql_statement(sql_statement, conn)
def insert_playerstats(conn, values):
sql = """INSERT INTO PlayerStats(PlayerID, Club, position, gp, gs, min, year, season)
VALUES(?, ?, ?, ?, ?, ?, ?,?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for row in rows:
row = list(row)
pid = player_lookup_dict[row[0].strip()]
row[0] = pid
if not row[1]:
continue
row[1] = row[1].strip()
insert_playerstats(conn_norm, row)