13.3. Filtering#

13.3.1. Logical Operators#

Operator

What it does?

AND

True if both conditions are true

OR

True if one of two conditions is true

NOT

Negate a specified condition

IN

Allows for multiple OR conditions

NOT IN

Negate multiple AND conditions

EXISTS

True if a record exists

LIKE

True if there is a string match using %

13.3.2. Relational Operators#

Assume a=1 and b=1

Relational Operators

What it does?

=

True if a has the same value as b

<>, !=

True if a does not have the same value as b

>

True if a is greater than b

<

True if a is less than b

>=

True if a is greater than or equal to b

<=

True if a is less than or equal to b

13.3.3. Conditional Evaluation#

SELECT *
FROM PatientCorePopulatedTable
WHERE PatientGender = 'Male' AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientGender = 'Male' AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE PatientGender = 'Male' AND PatientDateOfBirth < '1950-01-01'
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.4. Using Parenthesis#

SELECT *
FROM PatientCorePopulatedTable
WHERE (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[2], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.5. Using the NOT Operator#

SELECT *
FROM PatientCorePopulatedTable
WHERE NOT (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE NOT (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[3], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE NOT (PatientGender = 'Male' OR PatientRace = 'White') AND PatientDateOfBirth < '1950-01-01'
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.6. Inequality condition#

SELECT *
FROM PatientCorePopulatedTable
WHERE (PatientGender = 'Male' AND PatientRace <> 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE (PatientGender = 'Male' AND PatientRace <> 'White') AND PatientDateOfBirth < '1950-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[4], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE (PatientGender = 'Male' AND PatientRace <> 'White') AND PatientDateOfBirth < '1950-01-01'
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.7. Range using BETWEEN condition#

SELECT *
FROM PatientCorePopulatedTable
WHERE PatientDateOfBirth BETWEEN '1920-01-01' AND '1965-01-01'
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientDateOfBirth BETWEEN '1920-01-01' AND '1965-01-01'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[5], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE PatientDateOfBirth BETWEEN '1920-01-01' AND '1965-01-01'
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.8. String Condition#

SELECT *
FROM PatientCorePopulatedTable
WHERE PatientID BETWEEN '2A' AND '53'
ORDER BY PatientID
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientID BETWEEN '2A' AND '53'
ORDER BY PatientID
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[6], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE PatientID BETWEEN '2A' AND '53'
     10 ORDER BY PatientID
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.9. Membership Condition#

SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace = 'White' OR PatientRace = 'Asian'
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace = 'White' OR PatientRace = 'Asian'
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[7], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE PatientRace = 'White' OR PatientRace = 'Asian'
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.9.1. IN condition#

SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace IN ('White', 'Asian')
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace IN ('White', 'Asian')
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[8], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE PatientRace IN ('White', 'Asian')
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.9.2. NOT IN condition#

SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace NOT IN ('White', 'Asian')
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientRace NOT IN ('White', 'Asian')
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[9], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE PatientRace NOT IN ('White', 'Asian')
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.9.3. Using subqueries#

SELECT *
FROM PatientCorePopulatedTable
WHERE PatientID IN (SELECT PatientID FROM PatientCorePopulatedTable WHERE PatientDateOfBirth > '1980-01-01')
ORDER BY PatientDateOfBirth
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM PatientCorePopulatedTable
WHERE PatientID IN (SELECT PatientID FROM PatientCorePopulatedTable WHERE PatientDateOfBirth > '1980-01-01')
ORDER BY PatientDateOfBirth
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[10], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM PatientCorePopulatedTable
      9 WHERE PatientID IN (SELECT PatientID FROM PatientCorePopulatedTable WHERE PatientDateOfBirth > '1980-01-01')
     10 ORDER BY PatientDateOfBirth
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.10. Using Wildcards#

SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE 'M%'
ORDER BY PrimaryDiagnosisCode
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE 'M%'
ORDER BY PrimaryDiagnosisCode
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[11], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM AdmissionsDiagnosesCorePopulatedTable
      9 WHERE PrimaryDiagnosisCode LIKE 'M%'
     10 ORDER BY PrimaryDiagnosisCode
     11 """

ModuleNotFoundError: No module named 'pandas'
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE '%4'
ORDER BY PrimaryDiagnosisCode
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE '%4'
ORDER BY PrimaryDiagnosisCode
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[12], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM AdmissionsDiagnosesCorePopulatedTable
      9 WHERE PrimaryDiagnosisCode LIKE '%4'
     10 ORDER BY PrimaryDiagnosisCode
     11 """

ModuleNotFoundError: No module named 'pandas'
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE '%5.%'
ORDER BY PrimaryDiagnosisCode
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT *
FROM AdmissionsDiagnosesCorePopulatedTable
WHERE PrimaryDiagnosisCode LIKE '%5.%'
ORDER BY PrimaryDiagnosisCode
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[13], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM AdmissionsDiagnosesCorePopulatedTable
      9 WHERE PrimaryDiagnosisCode LIKE '%5.%'
     10 ORDER BY PrimaryDiagnosisCode
     11 """

ModuleNotFoundError: No module named 'pandas'

13.3.11. Checking for NULL#

SELECT *
FROM Cars
WHERE color_id IS NULL
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('cars.db')

sql_statement = """
SELECT *
FROM Cars
WHERE color_id IS NULL
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[14], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('cars.db')
      6 sql_statement = """
      7 SELECT *
      8 FROM Cars
      9 WHERE color_id IS NULL
     10 """

ModuleNotFoundError: No module named 'pandas'