13.3. Filtering#
13.3.1. Logical Operators#
Operator |
What it does? |
---|---|
|
True if both conditions are true |
|
True if one of two conditions is true |
|
Negate a specified condition |
|
Allows for multiple OR conditions |
|
Negate multiple AND conditions |
|
True if a record exists |
|
True if there is a string match using % |
13.3.2. Relational Operators#
Assume a=1
and b=1
Relational Operators |
What it does? |
---|---|
= |
True if a has the same value as b |
<>, != |
True if a does not have the same value as b |
> |
True if a is greater than b |
< |
True if a is less than b |
>= |
True if a is greater than or equal to b |
<= |
True if a is less than or equal to b |
13.3.3. Conditional Evaluation#
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientGender = 'Male' AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientGender = 'Male' AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE PatientGender = 'Male' AND PatientDateOfBirth < '1950-01-01'
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.4. Using Parenthesis#
SELECT *
FROM PatientCorePopulatedTable
WHERE (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[2], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.5. Using the NOT
Operator#
SELECT *
FROM PatientCorePopulatedTable
WHERE NOT (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE NOT (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[3], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE NOT (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.6. Inequality condition#
SELECT *
FROM PatientCorePopulatedTable
WHERE (PatientGender = 'Male' AND PatientRace <> 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE (PatientGender = 'Male' AND PatientRace <> 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[4], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE (PatientGender = 'Male' AND PatientRace <> 'White') AND PatientDateOfBirth < '1950-01-01'
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.7. Range using BETWEEN
condition#
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientDateOfBirth BETWEEN '1920-01-01' AND '1965-01-01'
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientDateOfBirth BETWEEN '1920-01-01' AND '1965-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[5], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE PatientDateOfBirth BETWEEN '1920-01-01' AND '1965-01-01'
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.8. String Condition#
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientID BETWEEN '2A' AND '53'
ORDER BY PatientID
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientID BETWEEN '2A' AND '53'
ORDER BY PatientID
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[6], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE PatientID BETWEEN '2A' AND '53'
10 ORDER BY PatientID
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.9. Membership Condition#
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace = 'White' OR PatientRace = 'Asian'
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace = 'White' OR PatientRace = 'Asian'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[7], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE PatientRace = 'White' OR PatientRace = 'Asian'
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.9.1. IN
condition#
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace IN ('White', 'Asian')
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace IN ('White', 'Asian')
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[8], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE PatientRace IN ('White', 'Asian')
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.9.2. NOT IN
condition#
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace NOT IN ('White', 'Asian')
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace NOT IN ('White', 'Asian')
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[9], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE PatientRace NOT IN ('White', 'Asian')
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.9.3. Using subqueries#
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientID IN (SELECT PatientID FROM PatientCorePopulatedTable WHERE PatientDateOfBirth > '1980-01-01')
ORDER BY PatientDateOfBirth
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientID IN (SELECT PatientID FROM PatientCorePopulatedTable WHERE PatientDateOfBirth > '1980-01-01')
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[10], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM PatientCorePopulatedTable
9 WHERE PatientID IN (SELECT PatientID FROM PatientCorePopulatedTable WHERE PatientDateOfBirth > '1980-01-01')
10 ORDER BY PatientDateOfBirth
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.10. Using Wildcards#
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE 'M%'
ORDER BY PrimaryDiagnosisCode
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE 'M%'
ORDER BY PrimaryDiagnosisCode
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[11], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM AdmissionsDiagnosesCorePopulatedTable
9 WHERE PrimaryDiagnosisCode LIKE 'M%'
10 ORDER BY PrimaryDiagnosisCode
11 """
ModuleNotFoundError: No module named 'pandas'
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE '%4'
ORDER BY PrimaryDiagnosisCode
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE '%4'
ORDER BY PrimaryDiagnosisCode
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[12], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM AdmissionsDiagnosesCorePopulatedTable
9 WHERE PrimaryDiagnosisCode LIKE '%4'
10 ORDER BY PrimaryDiagnosisCode
11 """
ModuleNotFoundError: No module named 'pandas'
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE '%5.%'
ORDER BY PrimaryDiagnosisCode
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE '%5.%'
ORDER BY PrimaryDiagnosisCode
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[13], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('100_patients.db')
6 sql_statement = """
7 SELECT *
8 FROM AdmissionsDiagnosesCorePopulatedTable
9 WHERE PrimaryDiagnosisCode LIKE '%5.%'
10 ORDER BY PrimaryDiagnosisCode
11 """
ModuleNotFoundError: No module named 'pandas'
13.3.11. Checking for NULL#
SELECT *
FROM Cars
WHERE color_id IS NULL
Show code cell source
import sqlite3
import pandas as pd
conn = sqlite3.connect('cars.db')
sql_statement = """
SELECT *
FROM Cars
WHERE color_id IS NULL
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[14], line 2
1 import sqlite3
----> 2 import pandas as pd
4 conn = sqlite3.connect('cars.db')
6 sql_statement = """
7 SELECT *
8 FROM Cars
9 WHERE color_id IS NULL
10 """
ModuleNotFoundError: No module named 'pandas'