13.14. Soccer Database#
import pandas as pd
import sqlite3
df = pd.read_csv('all_players.csv')
df['Club'].isna().sum()
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import pandas as pd
2 import sqlite3
5 df = pd.read_csv('all_players.csv')
ModuleNotFoundError: No module named 'pandas'
conn = sqlite3.connect('all_players.db')
cur = conn.cursor()
df.to_sql('players', conn, if_exists='replace', index=False) # - writes the all_players.df to SQLIte DB
conn.commit()
conn.close()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[2], line 1
----> 1 conn = sqlite3.connect('all_players.db')
2 cur = conn.cursor()
3 df.to_sql('players', conn, if_exists='replace', index=False) # - writes the all_players.df to SQLIte DB
NameError: name 'sqlite3' is not defined
import pandas as pd
import sqlite3
from sqlite3 import Error
def create_connection(db_file, delete_db=False):
import os
if delete_db and os.path.exists(db_file):
os.remove(db_file)
conn = None
try:
conn = sqlite3.connect(db_file)
conn.execute("PRAGMA foreign_keys = 1")
except Error as e:
print(e)
return conn
def create_table(conn, create_table_sql):
try:
c = conn.cursor()
c.execute(create_table_sql)
except Error as e:
print(e)
def execute_sql_statement(sql_statement, conn):
cur = conn.cursor()
cur.execute(sql_statement)
rows = cur.fetchall()
return rows
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[3], line 1
----> 1 import pandas as pd
2 import sqlite3
3 from sqlite3 import Error
ModuleNotFoundError: No module named 'pandas'
display(df)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[4], line 1
----> 1 display(df)
NameError: name 'df' is not defined
13.15. Tables#
Club
Position
Year
Season
Player
PlayerStats
conn = sqlite3.connect('all_players.db')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[5], line 1
----> 1 conn = sqlite3.connect('all_players.db')
NameError: name 'sqlite3' is not defined
sql_statement = "SELECT DISTINCT CLUB FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
clubs = list(map(lambda row: row[0].strip(), rows))
clubs = list(set(clubs))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[6], line 2
1 sql_statement = "SELECT DISTINCT CLUB FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
3 rows = filter(lambda row: row[0], rows)
4 clubs = list(map(lambda row: row[0].strip(), rows))
NameError: name 'execute_sql_statement' is not defined
from collections import Counter
counter = Counter(clubs)
print(counter)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[7], line 3
1 from collections import Counter
----> 3 counter = Counter(clubs)
4 print(counter)
NameError: name 'clubs' is not defined
# write create statement
# execute create statement
# fetch unique values
# write insert function
# feed unique values into insert function
conn_norm.close()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[9], line 1
----> 1 conn_norm.close()
NameError: name 'conn_norm' is not defined
conn_norm = create_connection('players_norm.db', True)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[10], line 1
----> 1 conn_norm = create_connection('players_norm.db', True)
NameError: name 'create_connection' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Clubs] (
[Club] TEXT NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[11], line 6
1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Clubs] (
2 [Club] TEXT NOT NULL PRIMARY KEY
3 );
4 """
----> 6 create_table(conn_norm, create_table_sql) # doesn't require commit
NameError: name 'create_table' is not defined
def insert_club(conn, values):
sql = """INSERT INTO Clubs(Club)
VALUES(?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for club in clubs:
insert_club(conn_norm, (club,))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[12], line 9
5 cur.execute(sql, values)
6 return cur.lastrowid
----> 9 with conn_norm:
10 for club in clubs:
11 insert_club(conn_norm, (club,))
NameError: name 'conn_norm' is not defined
sql_statement = "SELECT DISTINCT CLUB FROM Clubs"
rows = execute_sql_statement(sql_statement, conn_norm)
print(rows)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[13], line 2
1 sql_statement = "SELECT DISTINCT CLUB FROM Clubs"
----> 2 rows = execute_sql_statement(sql_statement, conn_norm)
3 print(rows)
NameError: name 'execute_sql_statement' is not defined
sql_statement = "SELECT DISTINCT POS FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
positions = list(map(lambda row: row[0].strip(), rows))
positions = list(set(positions))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[14], line 2
1 sql_statement = "SELECT DISTINCT POS FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
3 rows = filter(lambda row: row[0], rows)
4 positions = list(map(lambda row: row[0].strip(), rows))
NameError: name 'execute_sql_statement' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Positions] (
[Position] TEXT NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[15], line 6
1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Positions] (
2 [Position] TEXT NOT NULL PRIMARY KEY
3 );
4 """
----> 6 create_table(conn_norm, create_table_sql) # doesn't require commit
NameError: name 'create_table' is not defined
def insert_position(conn, values):
sql = """INSERT INTO Positions(Position)
VALUES(?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for position in positions:
insert_position(conn_norm, (position,))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[16], line 9
5 cur.execute(sql, values)
6 return cur.lastrowid
----> 9 with conn_norm:
10 for position in positions:
11 insert_position(conn_norm, (position,))
NameError: name 'conn_norm' is not defined
sql_statement = "SELECT DISTINCT Year FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
years = list(map(lambda row: int(row[0]), rows))
years = list(set(years))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[17], line 2
1 sql_statement = "SELECT DISTINCT Year FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
3 rows = filter(lambda row: row[0], rows)
4 years = list(map(lambda row: int(row[0]), rows))
NameError: name 'execute_sql_statement' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Years] (
[Year] INTEGER NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[18], line 6
1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Years] (
2 [Year] INTEGER NOT NULL PRIMARY KEY
3 );
4 """
----> 6 create_table(conn_norm, create_table_sql) # doesn't require commit
NameError: name 'create_table' is not defined
def insert_year(conn, values):
sql = """INSERT INTO Years(Year)
VALUES(?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for year in years:
insert_year(conn_norm, (year,))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[19], line 9
5 cur.execute(sql, values)
6 return cur.lastrowid
----> 9 with conn_norm:
10 for year in years:
11 insert_year(conn_norm, (year,))
NameError: name 'conn_norm' is not defined
sql_statement = "SELECT DISTINCT season FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
seasons = list(map(lambda row: row[0].strip(), rows))
seasons = list(set(seasons))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[20], line 2
1 sql_statement = "SELECT DISTINCT season FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
3 rows = filter(lambda row: row[0], rows)
4 seasons = list(map(lambda row: row[0].strip(), rows))
NameError: name 'execute_sql_statement' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Seasons] (
[Season] TEXT NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[21], line 6
1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Seasons] (
2 [Season] TEXT NOT NULL PRIMARY KEY
3 );
4 """
----> 6 create_table(conn_norm, create_table_sql) # doesn't require commit
NameError: name 'create_table' is not defined
def insert_season(conn, values):
sql = """INSERT INTO Seasons(season)
VALUES(?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for season in seasons:
insert_season(conn_norm, (season,))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[22], line 9
5 cur.execute(sql, values)
6 return cur.lastrowid
----> 9 with conn_norm:
10 for season in seasons:
11 insert_season(conn_norm, (season,))
NameError: name 'conn_norm' is not defined
sql_statement = "SELECT DISTINCT player FROM players"
rows = execute_sql_statement(sql_statement, conn)
players = []
for row in rows:
row = row[0].strip()
name = row.split(' ', 1)
if len(name) == 2:
first, last = name
first = first.strip()
last = last.strip()
players.append((first, last))
elif len(name) == 1:
first = name[0]
players.append((first, None))
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[23], line 2
1 sql_statement = "SELECT DISTINCT player FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
3 players = []
4 for row in rows:
NameError: name 'execute_sql_statement' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Players] (
[PlayerID] INTEGER NOT NULL PRIMARY KEY,
[FirstName] TEXT NOT NULL,
[LastName] TEXT
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[24], line 8
1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Players] (
2 [PlayerID] INTEGER NOT NULL PRIMARY KEY,
3 [FirstName] TEXT NOT NULL,
4 [LastName] TEXT
5 );
6 """
----> 8 create_table(conn_norm, create_table_sql) # doesn't require commit
NameError: name 'create_table' is not defined
def insert_player(conn, values):
sql = """INSERT INTO Players(FirstName, LastName)
VALUES(?, ?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for player in players:
insert_player(conn_norm, player)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[25], line 9
5 cur.execute(sql, values)
6 return cur.lastrowid
----> 9 with conn_norm:
10 for player in players:
11 insert_player(conn_norm, player)
NameError: name 'conn_norm' is not defined
cur = conn_norm.cursor()
cur.execute('DROP TABLE PlayerStats')
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[26], line 1
----> 1 cur = conn_norm.cursor()
2 cur.execute('DROP TABLE PlayerStats')
NameError: name 'conn_norm' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[PlayerStats] (
[StatID] INTEGER NOT NULL PRIMARY KEY,
[PlayerID] INTEGER NOT NULL,
[Club] TEXT,
[Position] TEXT NOT NULL,
[GP] INTEGER NOT NULL,
[GS] INTEGER NOT NULL,
[MIN] INTEGER NOT NULL,
[Year] INTEGER NOT NULL,
[Season] TEXT NOT NULL,
FOREIGN KEY(PlayerID) REFERENCES Players(PlayerID),
FOREIGN KEY(Club) REFERENCES Clubs(Club),
FOREIGN KEY(Year) REFERENCES Years(Year),
FOREIGN KEY(Season) REFERENCES Seasons(Season)
);
"""
create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[27], line 18
1 create_table_sql = """CREATE TABLE IF NOT EXISTS[PlayerStats] (
2 [StatID] INTEGER NOT NULL PRIMARY KEY,
3 [PlayerID] INTEGER NOT NULL,
(...) 15 );
16 """
---> 18 create_table(conn_norm, create_table_sql) # doesn't require commit
NameError: name 'create_table' is not defined
sql_statement = "SELECT DISTINCT * FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows[:10]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[28], line 2
1 sql_statement = "SELECT DISTINCT * FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
3 rows[:10]
NameError: name 'execute_sql_statement' is not defined
sql_statement = "SELECT * FROM players"
rows = execute_sql_statement(sql_statement, conn_norm)
player_lookup_dict = {}
for row in rows:
pid, first, last = row
if last:
name = f'{first} {last}'
player_lookup_dict[name] = pid
else:
player_lookup_dict[first] = pid
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[29], line 2
1 sql_statement = "SELECT * FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn_norm)
3 player_lookup_dict = {}
4 for row in rows:
NameError: name 'execute_sql_statement' is not defined
sql_statement = "SELECT DISTINCT * FROM players"
rows = execute_sql_statement(sql_statement, conn)
def insert_playerstats(conn, values):
sql = """INSERT INTO PlayerStats(PlayerID, Club, position, gp, gs, min, year, season)
VALUES(?, ?, ?, ?, ?, ?, ?,?)"""
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn_norm:
for row in rows:
row = list(row)
pid = player_lookup_dict[row[0].strip()]
row[0] = pid
if not row[1]:
continue
row[1] = row[1].strip()
insert_playerstats(conn_norm, row)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[30], line 2
1 sql_statement = "SELECT DISTINCT * FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
5 def insert_playerstats(conn, values):
6 sql = """INSERT INTO PlayerStats(PlayerID, Club, position, gp, gs, min, year, season)
7 VALUES(?, ?, ?, ?, ?, ?, ?,?)"""
NameError: name 'execute_sql_statement' is not defined