Soccer Database

13.14. Soccer Database#

import pandas as pd
import sqlite3


df = pd.read_csv('all_players.csv')
df['Club'].isna().sum()
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 1
----> 1 import pandas as pd
      2 import sqlite3
      5 df = pd.read_csv('all_players.csv')

ModuleNotFoundError: No module named 'pandas'
conn = sqlite3.connect('all_players.db')
cur = conn.cursor()
df.to_sql('players', conn, if_exists='replace', index=False) # - writes the all_players.df to SQLIte DB
conn.commit()
conn.close()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[2], line 1
----> 1 conn = sqlite3.connect('all_players.db')
      2 cur = conn.cursor()
      3 df.to_sql('players', conn, if_exists='replace', index=False) # - writes the all_players.df to SQLIte DB

NameError: name 'sqlite3' is not defined
import pandas as pd
import sqlite3
from sqlite3 import Error


def create_connection(db_file, delete_db=False):
    import os
    if delete_db and os.path.exists(db_file):
        os.remove(db_file)

    conn = None
    try:
        conn = sqlite3.connect(db_file)
        conn.execute("PRAGMA foreign_keys = 1")
    except Error as e:
        print(e)

    return conn


def create_table(conn, create_table_sql):
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)
        
def execute_sql_statement(sql_statement, conn):
    cur = conn.cursor()
    cur.execute(sql_statement)
    rows = cur.fetchall()

    return rows
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[3], line 1
----> 1 import pandas as pd
      2 import sqlite3
      3 from sqlite3 import Error

ModuleNotFoundError: No module named 'pandas'
display(df)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 display(df)

NameError: name 'df' is not defined

13.15. Tables#

  • Club

  • Position

  • Year

  • Season

  • Player

  • PlayerStats

conn = sqlite3.connect('all_players.db')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 conn = sqlite3.connect('all_players.db')

NameError: name 'sqlite3' is not defined
sql_statement = "SELECT DISTINCT CLUB FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
clubs = list(map(lambda row: row[0].strip(), rows))
clubs = list(set(clubs))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[6], line 2
      1 sql_statement = "SELECT DISTINCT CLUB FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
      3 rows = filter(lambda row: row[0], rows)
      4 clubs = list(map(lambda row: row[0].strip(), rows))

NameError: name 'execute_sql_statement' is not defined
from collections import Counter

counter = Counter(clubs)
print(counter)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], line 3
      1 from collections import Counter
----> 3 counter = Counter(clubs)
      4 print(counter)

NameError: name 'clubs' is not defined
# write create statement
# execute create statement
# fetch unique values
# write insert function
# feed unique values into insert function
conn_norm.close()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[9], line 1
----> 1 conn_norm.close()

NameError: name 'conn_norm' is not defined
conn_norm = create_connection('players_norm.db', True)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[10], line 1
----> 1 conn_norm = create_connection('players_norm.db', True)

NameError: name 'create_connection' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Clubs] (
    [Club] TEXT NOT NULL PRIMARY KEY
);
"""

create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[11], line 6
      1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Clubs] (
      2     [Club] TEXT NOT NULL PRIMARY KEY
      3 );
      4 """
----> 6 create_table(conn_norm, create_table_sql) # doesn't require commit

NameError: name 'create_table' is not defined
def insert_club(conn, values):
    sql = """INSERT INTO Clubs(Club)
                VALUES(?)"""
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid


with conn_norm:
    for club in clubs:
        insert_club(conn_norm, (club,))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[12], line 9
      5     cur.execute(sql, values)
      6     return cur.lastrowid
----> 9 with conn_norm:
     10     for club in clubs:
     11         insert_club(conn_norm, (club,))

NameError: name 'conn_norm' is not defined
sql_statement = "SELECT DISTINCT CLUB FROM Clubs"
rows = execute_sql_statement(sql_statement, conn_norm)
print(rows)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[13], line 2
      1 sql_statement = "SELECT DISTINCT CLUB FROM Clubs"
----> 2 rows = execute_sql_statement(sql_statement, conn_norm)
      3 print(rows)

NameError: name 'execute_sql_statement' is not defined
sql_statement = "SELECT DISTINCT POS FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
positions = list(map(lambda row: row[0].strip(), rows))
positions = list(set(positions))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[14], line 2
      1 sql_statement = "SELECT DISTINCT POS FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
      3 rows = filter(lambda row: row[0], rows)
      4 positions = list(map(lambda row: row[0].strip(), rows))

NameError: name 'execute_sql_statement' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Positions] (
    [Position] TEXT NOT NULL PRIMARY KEY
);
"""

create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[15], line 6
      1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Positions] (
      2     [Position] TEXT NOT NULL PRIMARY KEY
      3 );
      4 """
----> 6 create_table(conn_norm, create_table_sql) # doesn't require commit

NameError: name 'create_table' is not defined
def insert_position(conn, values):
    sql = """INSERT INTO Positions(Position)
                VALUES(?)"""
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid


with conn_norm:
    for position in positions:
        insert_position(conn_norm, (position,))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[16], line 9
      5     cur.execute(sql, values)
      6     return cur.lastrowid
----> 9 with conn_norm:
     10     for position in positions:
     11         insert_position(conn_norm, (position,))

NameError: name 'conn_norm' is not defined
sql_statement = "SELECT DISTINCT Year FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
years = list(map(lambda row: int(row[0]), rows))
years = list(set(years))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[17], line 2
      1 sql_statement = "SELECT DISTINCT Year FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
      3 rows = filter(lambda row: row[0], rows)
      4 years = list(map(lambda row: int(row[0]), rows))

NameError: name 'execute_sql_statement' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Years] (
    [Year] INTEGER NOT NULL PRIMARY KEY
);
"""

create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[18], line 6
      1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Years] (
      2     [Year] INTEGER NOT NULL PRIMARY KEY
      3 );
      4 """
----> 6 create_table(conn_norm, create_table_sql) # doesn't require commit

NameError: name 'create_table' is not defined
def insert_year(conn, values):
    sql = """INSERT INTO Years(Year)
                VALUES(?)"""
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid


with conn_norm:
    for year in years:
        insert_year(conn_norm, (year,))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[19], line 9
      5     cur.execute(sql, values)
      6     return cur.lastrowid
----> 9 with conn_norm:
     10     for year in years:
     11         insert_year(conn_norm, (year,))

NameError: name 'conn_norm' is not defined
sql_statement = "SELECT DISTINCT season FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows = filter(lambda row: row[0], rows)
seasons = list(map(lambda row: row[0].strip(), rows))
seasons = list(set(seasons))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[20], line 2
      1 sql_statement = "SELECT DISTINCT season FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
      3 rows = filter(lambda row: row[0], rows)
      4 seasons = list(map(lambda row: row[0].strip(), rows))

NameError: name 'execute_sql_statement' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Seasons] (
    [Season] TEXT NOT NULL PRIMARY KEY
);
"""

create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[21], line 6
      1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Seasons] (
      2     [Season] TEXT NOT NULL PRIMARY KEY
      3 );
      4 """
----> 6 create_table(conn_norm, create_table_sql) # doesn't require commit

NameError: name 'create_table' is not defined
def insert_season(conn, values):
    sql = """INSERT INTO Seasons(season)
                VALUES(?)"""
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid


with conn_norm:
    for season in seasons:
        insert_season(conn_norm, (season,))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[22], line 9
      5     cur.execute(sql, values)
      6     return cur.lastrowid
----> 9 with conn_norm:
     10     for season in seasons:
     11         insert_season(conn_norm, (season,))

NameError: name 'conn_norm' is not defined
sql_statement = "SELECT DISTINCT player FROM players"
rows = execute_sql_statement(sql_statement, conn)
players = []
for row in rows:
    row = row[0].strip()
    name = row.split(' ', 1)
    if len(name) == 2:
        first, last = name
        first = first.strip()
        last = last.strip()
        players.append((first, last))
    elif len(name) == 1:
        first = name[0]
        players.append((first, None))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[23], line 2
      1 sql_statement = "SELECT DISTINCT player FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
      3 players = []
      4 for row in rows:

NameError: name 'execute_sql_statement' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[Players] (
    [PlayerID] INTEGER NOT NULL PRIMARY KEY,
    [FirstName] TEXT NOT NULL,
    [LastName] TEXT
);
"""

create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[24], line 8
      1 create_table_sql = """CREATE TABLE IF NOT EXISTS[Players] (
      2     [PlayerID] INTEGER NOT NULL PRIMARY KEY,
      3     [FirstName] TEXT NOT NULL,
      4     [LastName] TEXT
      5 );
      6 """
----> 8 create_table(conn_norm, create_table_sql) # doesn't require commit

NameError: name 'create_table' is not defined
def insert_player(conn, values):
    sql = """INSERT INTO Players(FirstName, LastName)
                VALUES(?, ?)"""
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid


with conn_norm:
    for player in players:
        insert_player(conn_norm, player)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[25], line 9
      5     cur.execute(sql, values)
      6     return cur.lastrowid
----> 9 with conn_norm:
     10     for player in players:
     11         insert_player(conn_norm, player)

NameError: name 'conn_norm' is not defined
cur = conn_norm.cursor()
cur.execute('DROP TABLE PlayerStats')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[26], line 1
----> 1 cur = conn_norm.cursor()
      2 cur.execute('DROP TABLE PlayerStats')

NameError: name 'conn_norm' is not defined
create_table_sql = """CREATE TABLE IF NOT EXISTS[PlayerStats] (
    [StatID] INTEGER NOT NULL PRIMARY KEY,
    [PlayerID] INTEGER NOT NULL,
    [Club] TEXT,
    [Position] TEXT NOT NULL,
    [GP] INTEGER NOT NULL,
    [GS] INTEGER NOT NULL,
    [MIN] INTEGER NOT NULL,
    [Year] INTEGER NOT NULL,
    [Season] TEXT NOT NULL,
    FOREIGN KEY(PlayerID) REFERENCES Players(PlayerID),
    FOREIGN KEY(Club) REFERENCES Clubs(Club),
    FOREIGN KEY(Year) REFERENCES Years(Year),
    FOREIGN KEY(Season) REFERENCES Seasons(Season)     
);
"""

create_table(conn_norm, create_table_sql) # doesn't require commit
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[27], line 18
      1 create_table_sql = """CREATE TABLE IF NOT EXISTS[PlayerStats] (
      2     [StatID] INTEGER NOT NULL PRIMARY KEY,
      3     [PlayerID] INTEGER NOT NULL,
   (...)     15 );
     16 """
---> 18 create_table(conn_norm, create_table_sql) # doesn't require commit

NameError: name 'create_table' is not defined
sql_statement = "SELECT DISTINCT * FROM players"
rows = execute_sql_statement(sql_statement, conn)
rows[:10]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[28], line 2
      1 sql_statement = "SELECT DISTINCT * FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
      3 rows[:10]

NameError: name 'execute_sql_statement' is not defined
sql_statement = "SELECT * FROM players"
rows = execute_sql_statement(sql_statement, conn_norm)
player_lookup_dict = {}
for row in rows:
    pid, first, last = row
    if last:
        name = f'{first} {last}'
        player_lookup_dict[name] = pid
    else:
        player_lookup_dict[first] = pid
        
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[29], line 2
      1 sql_statement = "SELECT * FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn_norm)
      3 player_lookup_dict = {}
      4 for row in rows:

NameError: name 'execute_sql_statement' is not defined
sql_statement = "SELECT DISTINCT * FROM players"
rows = execute_sql_statement(sql_statement, conn)


def insert_playerstats(conn, values):
    sql = """INSERT INTO PlayerStats(PlayerID, Club, position, gp, gs, min, year, season)
                VALUES(?, ?, ?, ?, ?, ?, ?,?)"""
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

with conn_norm:
    for row in rows:
        row = list(row)
        pid = player_lookup_dict[row[0].strip()]            
        row[0] = pid
        if not row[1]:
            continue
        row[1] = row[1].strip()
        insert_playerstats(conn_norm, row)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[30], line 2
      1 sql_statement = "SELECT DISTINCT * FROM players"
----> 2 rows = execute_sql_statement(sql_statement, conn)
      5 def insert_playerstats(conn, values):
      6     sql = """INSERT INTO PlayerStats(PlayerID, Club, position, gp, gs, min, year, season)
      7                 VALUES(?, ?, ?, ?, ?, ?, ?,?)"""

NameError: name 'execute_sql_statement' is not defined