13.11. Python and SQLite3#
13.11.1. Basic example#
This is a basic example to show you how to use Pandas to open a database file and execute an SQL query.
import pandas as pd
import sqlite3
conn = sqlite3.connect("join_example_database.db")
sql_statement = "select Quantities.product, Quantities.quantity FROM Quantities"
df = pd.read_sql_query(sql_statement, conn)
display(df)
13.11.2. Homework Example#
Homework will ask you to write an SQL query. In that case you only have to update the
sql_statement
variable and leave rest of the code as is.
def dbex_1():
"""
Return all products with quantity greater than 60;
output columns: product_name, product_quantity
output (df, sql_statement)
"""
import pandas as pd
import sqlite3
conn = sqlite3.connect("join_example_database.db")
cur = conn.cursor()
sql_statement = "select Quantities.product, Quantities.quantity FROM Quantities Where quantity > 60;"
df = pd.read_sql_query(sql_statement, conn)
return (df, sql_statement)
The following code shows how the
sql_statement
will be verified.
df, sql_statement = dbex_1()
data = pd.read_csv("dbex_1.csv")
conn = sqlite3.connect("join_example_database.db")
cur = conn.cursor()
df = pd.read_sql_query(sql_statement, conn)
assert df.equals(data) == True
13.11.3. Utility functions#
Two utility functions are provided that you will use often
import sqlite3
from sqlite3 import Error
def create_connection(db_file):
conn = None
try:
conn = sqlite3.connect(db_file)
except Error as e:
print(e)
return conn
def create_table(conn, create_table_sql):
try:
c = conn.cursor()
c.execute(create_table_sql)
except Error as e:
print(e)
create_connection
opens an sqlite3 file for writing andcreate_table
function creates a table.
13.11.4. Example SQL query using Python#
db_file = 'student_test_in_class.db'
conn = create_connection(db_file)
cur = conn.cursor()
cur.execute("SELECT first_name, last_name FROM students")
rows = cur.fetchall()
for row in rows:
print(row)
13.11.5. Steps to creating a database using Python#
Write a create table sql statement(s)
Write insert function(s)
Read files or data and use the insert function(s) to insert data into table
13.11.5.1. Create a database and populate from a file#
import os
db_file = 'student_test_in_class.db'
if os.path.exists(db_file):
os.remove(db_file)
create_table_sql = """
CREATE TABLE students (
last_name TEXT,
first_name TEXT,
username TEXT,
exam1 REAL,
exam2 REAL,
exam3 REAL
);
"""
conn = create_connection(db_file)
def insert_student(conn, values):
sql = ''' INSERT INTO students(last_name,first_name,username,exam1,exam2,exam3)
VALUES(?,?,?,?,?,?) '''
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
with conn:
# create
create_table(conn, create_table_sql)
# insert
for student in open('students.tsv', 'r'):
values = student.strip().split('\t')
print(values)
rid = insert_student(conn, values) ## What is rid?
13.11.5.2. Fetch data#
sql_statement = "SELECT * FROM Students"
df = pd.read_sql_query(sql_statement, conn)
display(df)
13.11.6. Another example#
You must understand that the insert statements take in a tuple even if you are inserting one value!!!!
# 1 -- create table sql statements
# 2 -- create insert functions
# 3 -- read files or data and use the insert function
db = 'depts_students.db'
if os.path.exists(db):
os.remove(db)
create_table_departments_sql = """ CREATE TABLE [Departments] (
[DepartmentId] INTEGER NOT NULL PRIMARY KEY,
[DepartmentName] TEXT
); """
create_table_students_sql = """CREATE TABLE [Students] (
[StudentId] INTEGER PRIMARY KEY NOT NULL,
[StudentName] TEXT NOT NULL,
[DepartmentId] INTEGER,
[DateOfBirth] DATE,
FOREIGN KEY(DepartmentId) REFERENCES Departments(DepartmentId)
);"""
depts = ('IT', 'Physics', 'Arts', 'Math')
students = (
('Michael', 1, '1998-10-12'),
('John', 1, '1998-10-12'),
('Jack', 1, '1998-10-12'),
('Sara', 2, '1998-10-12'),
('Sally', 2, '1998-10-12'),
('Jena', None, '1998-10-12'),
('Nancy', 2, '1998-10-12'),
('Adam', 3, '1998-10-12'),
('Stevens', 3, '1998-10-12'),
('George', None, '1998-10-12')
)
def insert_depts(conn, values):
sql = ''' INSERT INTO Departments(DepartmentName)
VALUES(?) '''
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
def insert_student(conn, values):
sql = ''' INSERT INTO Students(StudentName, DepartmentId, DateOfBirth)
VALUES(?,?,?) '''
cur = conn.cursor()
cur.execute(sql, values)
return cur.lastrowid
conn = create_connection(db)
with conn:
create_table(conn, create_table_departments_sql)
create_table(conn, create_table_students_sql)
for values in depts:
insert_depts(conn, (values, ))
for values in students:
insert_student(conn, values)
13.11.6.1. Turning a single a value into tuple#
depts = ('IT', 'Physics', 'Arts', 'Math')
for val in depts:
print(val)
print((val,))
13.11.6.2. Table#
sql_statement = "SELECT * FROM Students"
df = pd.read_sql_query(sql_statement, conn)
display(df)
13.11.6.3. Fetch all#
cur = conn.cursor()
cur.execute('SELECT * FROM Departments')
for row in cur.fetchall():
print(row)
13.11.6.4. Fetch all again#
Why no data?
for row in cur.fetchall():
print(row)
13.11.6.5. Fetch one#
cur.execute('SELECT * FROM Departments')
print(cur.fetchone())
print(cur.fetchone())
print(cur.fetchone())
print(cur.fetchone())
print(cur.fetchone())
13.11.7. Creating a foreign key dictionary#
Creating a foreign key dictionary lookup speeds things up because you are avoiding multiple call to the database
conn = create_connection(db)
cur = conn.cursor()
cur.execute('SELECT * FROM Departments')
dept_fk_lookup = {}
for row in cur.fetchall():
key, text = row
dept_fk_lookup[text] = key
print(dept_fk_lookup)
13.11.8. Insert data using FK lookup dictionary#
db = 'depts_students.db'
if os.path.exists(db):
os.remove(db)
create_table_departments_sql = """ CREATE TABLE [Departments] (
[DepartmentId] INTEGER NOT NULL PRIMARY KEY,
[DepartmentName] TEXT
); """
create_table_students_sql = """CREATE TABLE [Students] (
[StudentId] INTEGER PRIMARY KEY NOT NULL,
[StudentName] TEXT NOT NULL,
[DepartmentId] INTEGER,
[DateOfBirth] DATE,
FOREIGN KEY(DepartmentId) REFERENCES Departments(DepartmentId)
);"""
depts = ('IT', 'Physics', 'Arts', 'Math')
students = (
('Michael', 'IT', '1998-10-12'),
('John', 'IT', '1998-10-12'),
('Jack', 'IT', '1998-10-12'),
('Sara', 'Physics', '1998-10-12'),
('Sally', 'Physics', '1998-10-12'),
('Jena', None, '1998-10-12'),
('Nancy', 'Physics', '1998-10-12'),
('Adam', 'Arts', '1998-10-12'),
('Stevens', 'Arts', '1998-10-12'),
('George', None, '1998-10-12')
)
with conn:
create_table(conn, create_table_departments_sql)
create_table(conn, create_table_students_sql)
for values in depts:
insert_depts(conn, (values, ))
for values in students:
values = list(values)
print('BEFORE', values)
key = values[1]
print('key', key)
if key:
values[1] = dept_fk_lookup[values[1]]
print('AFTER',values)
insert_student(con