13.8. Analytic Functions#

Analytic functions allow you to group rows into windows, partitioning the data. Windows are defined using the OVER clause and optionally combined with the PARTITION subclause.

Date Reference: https://www.techonthenet.com/sqlite/functions/julianday.php

13.8.1. Data Windows#

SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month,
    sum(total) Monthly_Sales
FROM
    bakery_sales
GROUP BY
    Quarter,
    Month
ORDER BY 
	strftime('%m', sale_date)
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month,
    sum(total) Monthly_Sales
FROM
    bakery_sales
GROUP BY
    Quarter,
    Month
ORDER BY 
	strftime('%m', sale_date)
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('bakery_sales.db')
      6 sql_statement = """
      7 SELECT
      8     CASE
   (...)     39 	strftime('%m', sale_date)
     40 """

ModuleNotFoundError: No module named 'pandas'
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	Max(sum(total)) over() max_overall_sales,
	Max(sum(total)) over(partition by quarter) max_quarter_sales
FROM SalesTable
GROUP BY Quarter, Month
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	Max(sum(total)) over() max_overall_sales,
	Max(sum(total)) over(partition by quarter) max_quarter_sales
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[2], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('bakery_sales.db')
      6 sql_statement = """
      7 WITH SalesTable AS (
      8 SELECT
   (...)     44 GROUP BY Quarter, Month
     45 """

ModuleNotFoundError: No module named 'pandas'

13.8.2. Localized Sorting#

WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	rank() OVER (ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	rank() OVER (ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[3], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('bakery_sales.db')
      6 sql_statement = """
      7 WITH SalesTable AS (
      8 SELECT
   (...)     43 GROUP BY Quarter, Month
     44 """

ModuleNotFoundError: No module named 'pandas'
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	rank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	rank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[4], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('bakery_sales.db')
      6 sql_statement = """
      7 WITH SalesTable AS (
      8 SELECT
   (...)     43 GROUP BY Quarter, Month
     44 """

ModuleNotFoundError: No module named 'pandas'

13.8.3. Window Frames#

ref: https://www.sqlitetutorial.net/sqlite-window-functions/sqlite-window-frame/

WITH SalesTable AS (
SELECT
	cast(strftime('%Y', sale_date) AS INT) sale_year,
	cast(strftime('%W', sale_date) AS INT) sale_week,
	Total
FROM
    bakery_sales
ORDER BY sale_date
)
SELECT 
	sale_year,
	sale_week,
	sum(total) week_total,
	sum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum
FROM SalesTable
GROUP BY sale_year, sale_week
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
	cast(strftime('%Y', sale_date) AS INT) sale_year,
	cast(strftime('%W', sale_date) AS INT) sale_week,
	Total
FROM
    bakery_sales
ORDER BY sale_date
)
SELECT 
	sale_year,
	sale_week,
	sum(total) week_total,
	sum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum
FROM SalesTable
GROUP BY sale_year, sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[5], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('bakery_sales.db')
      6 sql_statement = """
      7 WITH SalesTable AS (
      8 SELECT
   (...)     22 GROUP BY sale_year, sale_week
     23 """

ModuleNotFoundError: No module named 'pandas'
SELECT 
	sale_date,
	sum(total) total,
	max(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max
FROM bakery_sales
GROUP BY sale_date
order by sale_date
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
SELECT 
	sale_date,
	sum(total) total,
	max(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max
FROM bakery_sales
GROUP BY sale_date
order by sale_date
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[6], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('bakery_sales.db')
      6 sql_statement = """
      7 SELECT 
      8 	sale_date,
   (...)     13 order by sale_date
     14 """

ModuleNotFoundError: No module named 'pandas'

13.8.4. Lag and Lead#

WITH SalesTable AS (
SELECT
	cast(strftime('%Y', sale_date) AS INT) sale_year,
	cast(strftime('%W', sale_date) AS INT) sale_week,
	Total
FROM
    bakery_sales
ORDER BY sale_date
)
SELECT 
	sale_year,
	sale_week,
	sum(total) week_total,
	lag(sum(total), 1) over (order by sale_week) prev_week_total,
	lead(sum(total), 1) over (order by sale_week) next_week_total
FROM SalesTable
GROUP BY sale_year, sale_week
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
	cast(strftime('%Y', sale_date) AS INT) sale_year,
	cast(strftime('%W', sale_date) AS INT) sale_week,
	Total
FROM
    bakery_sales
ORDER BY sale_date
)
SELECT 
	sale_year,
	sale_week,
	sum(total) week_total,
	lag(sum(total), 1) over (order by sale_week) prev_week_total,
	lead(sum(total), 1) over (order by sale_week) next_week_total
FROM SalesTable
GROUP BY sale_year, sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[7], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('bakery_sales.db')
      6 sql_statement = """
      7 WITH SalesTable AS (
      8 SELECT
   (...)     23 GROUP BY sale_year, sale_week
     24 """

ModuleNotFoundError: No module named 'pandas'
WITH SalesTable AS (
    SELECT
        cast(strftime('%Y', sale_date) AS INT) sale_year,
        cast(strftime('%W', sale_date) AS INT) sale_week,
        Total
    FROM
        bakery_sales
    ORDER BY
        sale_date
)
SELECT
    sale_year,
    sale_week,
    sum(total) week_total,
    round(
        CAST(
            (
                sum(total) - lag(sum(total), 1) over (
                    order by
                        sale_week
                )
            ) AS REAL
        ) / CAST(
            lag(sum(total), 1) over (
                order by
                    sale_week
            ) AS REAL
        ) * 100,
        1
    ) CHANGE
FROM
    SalesTable
GROUP BY
    sale_year,
    sale_week
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
    SELECT
        cast(strftime('%Y', sale_date) AS INT) sale_year,
        cast(strftime('%W', sale_date) AS INT) sale_week,
        Total
    FROM
        bakery_sales
    ORDER BY
        sale_date
)
SELECT
    sale_year,
    sale_week,
    sum(total) week_total,
    round(
        CAST(
            (
                sum(total) - lag(sum(total), 1) over (
                    order by
                        sale_week
                )
            ) AS REAL
        ) / CAST(
            lag(sum(total), 1) over (
                order by
                    sale_week
            ) AS REAL
        ) * 100,
        1
    ) CHANGE
FROM
    SalesTable
GROUP BY
    sale_year,
    sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[8], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('bakery_sales.db')
      6 sql_statement = """
      7 WITH SalesTable AS (
      8     SELECT
   (...)     41     sale_week
     42 """

ModuleNotFoundError: No module named 'pandas'

13.8.5. Ranking#

  • rank: returns the same ranking in case of a tie, with gaps in the rankings

  • row_number: returns a unique number for each row, with rankings arbitrarily assigned in case of a tie

  • dense_rank: returns the same ranking in the case of a tie, with no gaps in the rankings

Ref: https://blog.jooq.org/the-difference-between-row_number-rank-and-dense_rank/

SELECT 
	PatientGender,
	PatientMaritalStatus, 
	count(*) StatusCount, 
	rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,
	row_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,
	dense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank
FROM 
PatientCorePopulatedTable
GROUP BY PatientGender, PatientMaritalStatus
Hide code cell source
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT 
	PatientGender,
	PatientMaritalStatus, 
	count(*) StatusCount, 
	rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,
	row_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,
	dense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank
FROM 
PatientCorePopulatedTable
GROUP BY PatientGender, PatientMaritalStatus
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[9], line 2
      1 import sqlite3
----> 2 import pandas as pd
      4 conn = sqlite3.connect('100_patients.db')
      6 sql_statement = """
      7 SELECT 
      8 	PatientGender,
   (...)     16 GROUP BY PatientGender, PatientMaritalStatus
     17 """

ModuleNotFoundError: No module named 'pandas'