Analytic Functions
Contents
Analytic Functions#
Analytic functions allow you to group rows into windows, partitioning
the data. Windows are defined using the OVER clause and optionally
combined with the PARTITION subclause.
Date Reference: https://www.techonthenet.com/sqlite/functions/julianday.php
Data Windows#
SELECT
CASE
WHEN 0 + strftime('%m', sale_date) BETWEEN 1
AND 3 THEN 'Q1'
WHEN 0 + strftime('%m', sale_date) BETWEEN 4
AND 6 THEN 'Q2'
WHEN 0 + strftime('%m', sale_date) BETWEEN 7
AND 9 THEN 'Q3'
WHEN 0 + strftime('%m', sale_date) BETWEEN 10
AND 12 THEN 'Q4'
END Quarter,
CASE
WHEN strftime('%m', sale_date) = '01' THEN 'January'
WHEN strftime('%m', sale_date) = '02' THEN 'February'
WHEN strftime('%m', sale_date) = '03' THEN 'March'
WHEN strftime('%m', sale_date) = '04' THEN 'April'
WHEN strftime('%m', sale_date) = '05' THEN 'May'
WHEN strftime('%m', sale_date) = '06' THEN 'June'
WHEN strftime('%m', sale_date) = '07' THEN 'July'
WHEN strftime('%m', sale_date) = '08' THEN 'August'
WHEN strftime('%m', sale_date) = '09' THEN 'September'
WHEN strftime('%m', sale_date) = '10' THEN 'October'
WHEN strftime('%m', sale_date) = '11' THEN 'November'
WHEN strftime('%m', sale_date) = '12' THEN 'December'
END Month,
sum(total) Monthly_Sales
FROM
bakery_sales
GROUP BY
Quarter,
Month
ORDER BY
strftime('%m', sale_date)
import sqlite3
import pandas as pd
conn = sqlite3.connect('bakery_sales.db')
sql_statement = """
SELECT
CASE
WHEN 0 + strftime('%m', sale_date) BETWEEN 1
AND 3 THEN 'Q1'
WHEN 0 + strftime('%m', sale_date) BETWEEN 4
AND 6 THEN 'Q2'
WHEN 0 + strftime('%m', sale_date) BETWEEN 7
AND 9 THEN 'Q3'
WHEN 0 + strftime('%m', sale_date) BETWEEN 10
AND 12 THEN 'Q4'
END Quarter,
CASE
WHEN strftime('%m', sale_date) = '01' THEN 'January'
WHEN strftime('%m', sale_date) = '02' THEN 'February'
WHEN strftime('%m', sale_date) = '03' THEN 'March'
WHEN strftime('%m', sale_date) = '04' THEN 'April'
WHEN strftime('%m', sale_date) = '05' THEN 'May'
WHEN strftime('%m', sale_date) = '06' THEN 'June'
WHEN strftime('%m', sale_date) = '07' THEN 'July'
WHEN strftime('%m', sale_date) = '08' THEN 'August'
WHEN strftime('%m', sale_date) = '09' THEN 'September'
WHEN strftime('%m', sale_date) = '10' THEN 'October'
WHEN strftime('%m', sale_date) = '11' THEN 'November'
WHEN strftime('%m', sale_date) = '12' THEN 'December'
END Month,
sum(total) Monthly_Sales
FROM
bakery_sales
GROUP BY
Quarter,
Month
ORDER BY
strftime('%m', sale_date)
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| Quarter | Month | Monthly_Sales | |
|---|---|---|---|
| 0 | Q1 | January | 4582500 |
| 1 | Q1 | February | 6423700 |
| 2 | Q1 | March | 6445100 |
| 3 | Q2 | April | 4893700 |
| 4 | Q2 | May | 308400 |
| 5 | Q3 | July | 4076500 |
| 6 | Q3 | August | 6100500 |
| 7 | Q3 | September | 4895500 |
| 8 | Q4 | October | 3959100 |
| 9 | Q4 | November | 4543000 |
| 10 | Q4 | December | 5009500 |
WITH SalesTable AS (
SELECT
CASE
WHEN 0 + strftime('%m', sale_date) BETWEEN 1
AND 3 THEN 'Q1'
WHEN 0 + strftime('%m', sale_date) BETWEEN 4
AND 6 THEN 'Q2'
WHEN 0 + strftime('%m', sale_date) BETWEEN 7
AND 9 THEN 'Q3'
WHEN 0 + strftime('%m', sale_date) BETWEEN 10
AND 12 THEN 'Q4'
END Quarter,
CASE
WHEN strftime('%m', sale_date) = '01' THEN 'January'
WHEN strftime('%m', sale_date) = '02' THEN 'February'
WHEN strftime('%m', sale_date) = '03' THEN 'March'
WHEN strftime('%m', sale_date) = '04' THEN 'April'
WHEN strftime('%m', sale_date) = '05' THEN 'May'
WHEN strftime('%m', sale_date) = '06' THEN 'June'
WHEN strftime('%m', sale_date) = '07' THEN 'July'
WHEN strftime('%m', sale_date) = '08' THEN 'August'
WHEN strftime('%m', sale_date) = '09' THEN 'September'
WHEN strftime('%m', sale_date) = '10' THEN 'October'
WHEN strftime('%m', sale_date) = '11' THEN 'November'
WHEN strftime('%m', sale_date) = '12' THEN 'December'
END Month,
Total
FROM
bakery_sales
)
SELECT
Quarter,
Month,
Sum(Total) MonthlySales,
Max(sum(total)) over() max_overall_sales,
Max(sum(total)) over(partition by quarter) max_quarter_sales
FROM SalesTable
GROUP BY Quarter, Month
import sqlite3
import pandas as pd
conn = sqlite3.connect('bakery_sales.db')
sql_statement = """
WITH SalesTable AS (
SELECT
CASE
WHEN 0 + strftime('%m', sale_date) BETWEEN 1
AND 3 THEN 'Q1'
WHEN 0 + strftime('%m', sale_date) BETWEEN 4
AND 6 THEN 'Q2'
WHEN 0 + strftime('%m', sale_date) BETWEEN 7
AND 9 THEN 'Q3'
WHEN 0 + strftime('%m', sale_date) BETWEEN 10
AND 12 THEN 'Q4'
END Quarter,
CASE
WHEN strftime('%m', sale_date) = '01' THEN 'January'
WHEN strftime('%m', sale_date) = '02' THEN 'February'
WHEN strftime('%m', sale_date) = '03' THEN 'March'
WHEN strftime('%m', sale_date) = '04' THEN 'April'
WHEN strftime('%m', sale_date) = '05' THEN 'May'
WHEN strftime('%m', sale_date) = '06' THEN 'June'
WHEN strftime('%m', sale_date) = '07' THEN 'July'
WHEN strftime('%m', sale_date) = '08' THEN 'August'
WHEN strftime('%m', sale_date) = '09' THEN 'September'
WHEN strftime('%m', sale_date) = '10' THEN 'October'
WHEN strftime('%m', sale_date) = '11' THEN 'November'
WHEN strftime('%m', sale_date) = '12' THEN 'December'
END Month,
Total
FROM
bakery_sales
)
SELECT
Quarter,
Month,
Sum(Total) MonthlySales,
Max(sum(total)) over() max_overall_sales,
Max(sum(total)) over(partition by quarter) max_quarter_sales
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| Quarter | Month | MonthlySales | max_overall_sales | max_quarter_sales | |
|---|---|---|---|---|---|
| 0 | Q1 | February | 6423700 | 6445100 | 6445100 |
| 1 | Q1 | January | 4582500 | 6445100 | 6445100 |
| 2 | Q1 | March | 6445100 | 6445100 | 6445100 |
| 3 | Q2 | April | 4893700 | 6445100 | 4893700 |
| 4 | Q2 | May | 308400 | 6445100 | 4893700 |
| 5 | Q3 | August | 6100500 | 6445100 | 6100500 |
| 6 | Q3 | July | 4076500 | 6445100 | 6100500 |
| 7 | Q3 | September | 4895500 | 6445100 | 6100500 |
| 8 | Q4 | December | 5009500 | 6445100 | 5009500 |
| 9 | Q4 | November | 4543000 | 6445100 | 5009500 |
| 10 | Q4 | October | 3959100 | 6445100 | 5009500 |
Localized Sorting#
WITH SalesTable AS (
SELECT
CASE
WHEN 0 + strftime('%m', sale_date) BETWEEN 1
AND 3 THEN 'Q1'
WHEN 0 + strftime('%m', sale_date) BETWEEN 4
AND 6 THEN 'Q2'
WHEN 0 + strftime('%m', sale_date) BETWEEN 7
AND 9 THEN 'Q3'
WHEN 0 + strftime('%m', sale_date) BETWEEN 10
AND 12 THEN 'Q4'
END Quarter,
CASE
WHEN strftime('%m', sale_date) = '01' THEN 'January'
WHEN strftime('%m', sale_date) = '02' THEN 'February'
WHEN strftime('%m', sale_date) = '03' THEN 'March'
WHEN strftime('%m', sale_date) = '04' THEN 'April'
WHEN strftime('%m', sale_date) = '05' THEN 'May'
WHEN strftime('%m', sale_date) = '06' THEN 'June'
WHEN strftime('%m', sale_date) = '07' THEN 'July'
WHEN strftime('%m', sale_date) = '08' THEN 'August'
WHEN strftime('%m', sale_date) = '09' THEN 'September'
WHEN strftime('%m', sale_date) = '10' THEN 'October'
WHEN strftime('%m', sale_date) = '11' THEN 'November'
WHEN strftime('%m', sale_date) = '12' THEN 'December'
END Month,
Total
FROM
bakery_sales
)
SELECT
Quarter,
Month,
Sum(Total) MonthlySales,
rank() OVER (ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
import sqlite3
import pandas as pd
conn = sqlite3.connect('bakery_sales.db')
sql_statement = """
WITH SalesTable AS (
SELECT
CASE
WHEN 0 + strftime('%m', sale_date) BETWEEN 1
AND 3 THEN 'Q1'
WHEN 0 + strftime('%m', sale_date) BETWEEN 4
AND 6 THEN 'Q2'
WHEN 0 + strftime('%m', sale_date) BETWEEN 7
AND 9 THEN 'Q3'
WHEN 0 + strftime('%m', sale_date) BETWEEN 10
AND 12 THEN 'Q4'
END Quarter,
CASE
WHEN strftime('%m', sale_date) = '01' THEN 'January'
WHEN strftime('%m', sale_date) = '02' THEN 'February'
WHEN strftime('%m', sale_date) = '03' THEN 'March'
WHEN strftime('%m', sale_date) = '04' THEN 'April'
WHEN strftime('%m', sale_date) = '05' THEN 'May'
WHEN strftime('%m', sale_date) = '06' THEN 'June'
WHEN strftime('%m', sale_date) = '07' THEN 'July'
WHEN strftime('%m', sale_date) = '08' THEN 'August'
WHEN strftime('%m', sale_date) = '09' THEN 'September'
WHEN strftime('%m', sale_date) = '10' THEN 'October'
WHEN strftime('%m', sale_date) = '11' THEN 'November'
WHEN strftime('%m', sale_date) = '12' THEN 'December'
END Month,
Total
FROM
bakery_sales
)
SELECT
Quarter,
Month,
Sum(Total) MonthlySales,
rank() OVER (ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| Quarter | Month | MonthlySales | SalesRank | |
|---|---|---|---|---|
| 0 | Q1 | March | 6445100 | 1 |
| 1 | Q1 | February | 6423700 | 2 |
| 2 | Q3 | August | 6100500 | 3 |
| 3 | Q4 | December | 5009500 | 4 |
| 4 | Q3 | September | 4895500 | 5 |
| 5 | Q2 | April | 4893700 | 6 |
| 6 | Q1 | January | 4582500 | 7 |
| 7 | Q4 | November | 4543000 | 8 |
| 8 | Q3 | July | 4076500 | 9 |
| 9 | Q4 | October | 3959100 | 10 |
| 10 | Q2 | May | 308400 | 11 |
WITH SalesTable AS (
SELECT
CASE
WHEN 0 + strftime('%m', sale_date) BETWEEN 1
AND 3 THEN 'Q1'
WHEN 0 + strftime('%m', sale_date) BETWEEN 4
AND 6 THEN 'Q2'
WHEN 0 + strftime('%m', sale_date) BETWEEN 7
AND 9 THEN 'Q3'
WHEN 0 + strftime('%m', sale_date) BETWEEN 10
AND 12 THEN 'Q4'
END Quarter,
CASE
WHEN strftime('%m', sale_date) = '01' THEN 'January'
WHEN strftime('%m', sale_date) = '02' THEN 'February'
WHEN strftime('%m', sale_date) = '03' THEN 'March'
WHEN strftime('%m', sale_date) = '04' THEN 'April'
WHEN strftime('%m', sale_date) = '05' THEN 'May'
WHEN strftime('%m', sale_date) = '06' THEN 'June'
WHEN strftime('%m', sale_date) = '07' THEN 'July'
WHEN strftime('%m', sale_date) = '08' THEN 'August'
WHEN strftime('%m', sale_date) = '09' THEN 'September'
WHEN strftime('%m', sale_date) = '10' THEN 'October'
WHEN strftime('%m', sale_date) = '11' THEN 'November'
WHEN strftime('%m', sale_date) = '12' THEN 'December'
END Month,
Total
FROM
bakery_sales
)
SELECT
Quarter,
Month,
Sum(Total) MonthlySales,
rank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
import sqlite3
import pandas as pd
conn = sqlite3.connect('bakery_sales.db')
sql_statement = """
WITH SalesTable AS (
SELECT
CASE
WHEN 0 + strftime('%m', sale_date) BETWEEN 1
AND 3 THEN 'Q1'
WHEN 0 + strftime('%m', sale_date) BETWEEN 4
AND 6 THEN 'Q2'
WHEN 0 + strftime('%m', sale_date) BETWEEN 7
AND 9 THEN 'Q3'
WHEN 0 + strftime('%m', sale_date) BETWEEN 10
AND 12 THEN 'Q4'
END Quarter,
CASE
WHEN strftime('%m', sale_date) = '01' THEN 'January'
WHEN strftime('%m', sale_date) = '02' THEN 'February'
WHEN strftime('%m', sale_date) = '03' THEN 'March'
WHEN strftime('%m', sale_date) = '04' THEN 'April'
WHEN strftime('%m', sale_date) = '05' THEN 'May'
WHEN strftime('%m', sale_date) = '06' THEN 'June'
WHEN strftime('%m', sale_date) = '07' THEN 'July'
WHEN strftime('%m', sale_date) = '08' THEN 'August'
WHEN strftime('%m', sale_date) = '09' THEN 'September'
WHEN strftime('%m', sale_date) = '10' THEN 'October'
WHEN strftime('%m', sale_date) = '11' THEN 'November'
WHEN strftime('%m', sale_date) = '12' THEN 'December'
END Month,
Total
FROM
bakery_sales
)
SELECT
Quarter,
Month,
Sum(Total) MonthlySales,
rank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| Quarter | Month | MonthlySales | SalesRank | |
|---|---|---|---|---|
| 0 | Q1 | March | 6445100 | 1 |
| 1 | Q1 | February | 6423700 | 2 |
| 2 | Q1 | January | 4582500 | 3 |
| 3 | Q2 | April | 4893700 | 1 |
| 4 | Q2 | May | 308400 | 2 |
| 5 | Q3 | August | 6100500 | 1 |
| 6 | Q3 | September | 4895500 | 2 |
| 7 | Q3 | July | 4076500 | 3 |
| 8 | Q4 | December | 5009500 | 1 |
| 9 | Q4 | November | 4543000 | 2 |
| 10 | Q4 | October | 3959100 | 3 |
Window Frames#
ref: https://www.sqlitetutorial.net/sqlite-window-functions/sqlite-window-frame/
WITH SalesTable AS (
SELECT
cast(strftime('%Y', sale_date) AS INT) sale_year,
cast(strftime('%W', sale_date) AS INT) sale_week,
Total
FROM
bakery_sales
ORDER BY sale_date
)
SELECT
sale_year,
sale_week,
sum(total) week_total,
sum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum
FROM SalesTable
GROUP BY sale_year, sale_week
import sqlite3
import pandas as pd
conn = sqlite3.connect('bakery_sales.db')
sql_statement = """
WITH SalesTable AS (
SELECT
cast(strftime('%Y', sale_date) AS INT) sale_year,
cast(strftime('%W', sale_date) AS INT) sale_week,
Total
FROM
bakery_sales
ORDER BY sale_date
)
SELECT
sale_year,
sale_week,
sum(total) week_total,
sum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum
FROM SalesTable
GROUP BY sale_year, sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| sale_year | sale_week | week_total | rolling_sum | |
|---|---|---|---|---|
| 0 | 2019 | 27 | 427000 | 427000 |
| 1 | 2019 | 28 | 701100 | 1128100 |
| 2 | 2019 | 29 | 2529900 | 3658000 |
| 3 | 2019 | 30 | 1331900 | 4989900 |
| 4 | 2019 | 31 | 1469900 | 6459800 |
| 5 | 2019 | 32 | 1357100 | 7816900 |
| 6 | 2019 | 33 | 1377200 | 9194100 |
| 7 | 2019 | 34 | 1225100 | 10419200 |
| 8 | 2019 | 35 | 1146900 | 11566100 |
| 9 | 2019 | 36 | 1062000 | 12628100 |
| 10 | 2019 | 37 | 1251100 | 13879200 |
| 11 | 2019 | 38 | 1079700 | 14958900 |
| 12 | 2019 | 39 | 819500 | 15778400 |
| 13 | 2019 | 40 | 858300 | 16636700 |
| 14 | 2019 | 41 | 1012200 | 17648900 |
| 15 | 2019 | 42 | 1072000 | 18720900 |
| 16 | 2019 | 43 | 865100 | 19586000 |
| 17 | 2019 | 44 | 1249000 | 20835000 |
| 18 | 2019 | 45 | 988400 | 21823400 |
| 19 | 2019 | 46 | 830500 | 22653900 |
| 20 | 2019 | 47 | 1216600 | 23870500 |
| 21 | 2019 | 48 | 1313200 | 25183700 |
| 22 | 2019 | 49 | 981800 | 26165500 |
| 23 | 2019 | 50 | 1089100 | 27254600 |
| 24 | 2019 | 51 | 1091200 | 28345800 |
| 25 | 2019 | 52 | 238300 | 28584100 |
| 26 | 2020 | 0 | 875600 | 29459700 |
| 27 | 2020 | 1 | 1223700 | 30683400 |
| 28 | 2020 | 2 | 1060400 | 31743800 |
| 29 | 2020 | 3 | 713300 | 32457100 |
| 30 | 2020 | 4 | 1225800 | 33682900 |
| 31 | 2020 | 5 | 1464900 | 35147800 |
| 32 | 2020 | 6 | 1298900 | 36446700 |
| 33 | 2020 | 7 | 1800900 | 38247600 |
| 34 | 2020 | 8 | 1746100 | 39993700 |
| 35 | 2020 | 9 | 1651000 | 41644700 |
| 36 | 2020 | 10 | 1296700 | 42941400 |
| 37 | 2020 | 11 | 1522700 | 44464100 |
| 38 | 2020 | 12 | 1352700 | 45816800 |
| 39 | 2020 | 13 | 1265700 | 47082500 |
| 40 | 2020 | 14 | 1084300 | 48166800 |
| 41 | 2020 | 15 | 915400 | 49082200 |
| 42 | 2020 | 16 | 1330100 | 50412300 |
| 43 | 2020 | 17 | 825200 | 51237500 |
SELECT
sale_date,
sum(total) total,
max(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max
FROM bakery_sales
GROUP BY sale_date
order by sale_date
import sqlite3
import pandas as pd
conn = sqlite3.connect('bakery_sales.db')
sql_statement = """
SELECT
sale_date,
sum(total) total,
max(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max
FROM bakery_sales
GROUP BY sale_date
order by sale_date
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| sale_date | total | seven_day_max | |
|---|---|---|---|
| 0 | 2019-07-11 | 39600 | 212000 |
| 1 | 2019-07-12 | 58000 | 212000 |
| 2 | 2019-07-13 | 117400 | 212000 |
| 3 | 2019-07-14 | 212000 | 212000 |
| 4 | 2019-07-15 | 30900 | 212000 |
| 5 | 2019-07-17 | 74100 | 212000 |
| 6 | 2019-07-19 | 196800 | 203400 |
| 7 | 2019-07-20 | 203400 | 203400 |
| 8 | 2019-07-21 | 195900 | 203400 |
| 9 | 2019-07-22 | 167100 | 259200 |
| 10 | 2019-07-24 | 177600 | 1593100 |
| 11 | 2019-07-25 | 259200 | 1593100 |
| 12 | 2019-07-26 | 1593100 | 1593100 |
| 13 | 2019-07-27 | 196000 | 1593100 |
| 14 | 2019-07-28 | 136900 | 1593100 |
| 15 | 2019-07-29 | 214600 | 1593100 |
| 16 | 2019-07-31 | 203900 | 309000 |
| 17 | 2019-08-01 | 215200 | 309000 |
| 18 | 2019-08-02 | 168000 | 309000 |
| 19 | 2019-08-03 | 309000 | 309000 |
| 20 | 2019-08-04 | 221200 | 309000 |
| 21 | 2019-08-05 | 277300 | 309000 |
| 22 | 2019-08-07 | 201000 | 300600 |
| 23 | 2019-08-08 | 290300 | 300600 |
| 24 | 2019-08-09 | 112500 | 300600 |
| 25 | 2019-08-10 | 300600 | 300600 |
| 26 | 2019-08-11 | 288200 | 300600 |
| 27 | 2019-08-12 | 232200 | 300600 |
| 28 | 2019-08-14 | 269500 | 288200 |
| 29 | 2019-08-15 | 203000 | 314800 |
| 30 | 2019-08-16 | 95900 | 314800 |
| 31 | 2019-08-17 | 241700 | 314800 |
| 32 | 2019-08-18 | 314800 | 314800 |
| 33 | 2019-08-19 | 272800 | 314800 |
| 34 | 2019-08-21 | 296700 | 314800 |
| 35 | 2019-08-22 | 182700 | 296700 |
| 36 | 2019-08-23 | 155800 | 296700 |
| 37 | 2019-08-24 | 264200 | 296700 |
| 38 | 2019-08-25 | 205000 | 264200 |
| 39 | 2019-08-26 | 159700 | 264200 |
| 40 | 2019-08-28 | 184900 | 257100 |
| 41 | 2019-08-29 | 254200 | 257100 |
| 42 | 2019-08-30 | 257100 | 257100 |
| 43 | 2019-08-31 | 127000 | 257100 |
| 44 | 2019-09-01 | 242200 | 277200 |
| 45 | 2019-09-02 | 132800 | 277200 |
| 46 | 2019-09-04 | 277200 | 285200 |
| 47 | 2019-09-05 | 247700 | 285200 |
| 48 | 2019-09-06 | 121700 | 285200 |
| 49 | 2019-09-07 | 285200 | 285200 |
| 50 | 2019-09-08 | 82300 | 285200 |
| 51 | 2019-09-09 | 215100 | 285200 |
| 52 | 2019-09-11 | 217700 | 217700 |
| 53 | 2019-09-12 | 185200 | 293300 |
| 54 | 2019-09-14 | 150700 | 293300 |
| 55 | 2019-09-15 | 293300 | 293300 |
| 56 | 2019-09-18 | 150800 | 301700 |
| 57 | 2019-09-19 | 301400 | 301700 |
| 58 | 2019-09-20 | 203000 | 301700 |
| 59 | 2019-09-21 | 301700 | 301700 |
| 60 | 2019-09-22 | 294200 | 301700 |
| 61 | 2019-09-23 | 174100 | 301700 |
| 62 | 2019-09-25 | 147400 | 294200 |
| 63 | 2019-09-26 | 54100 | 287000 |
| 64 | 2019-09-27 | 140400 | 287000 |
| 65 | 2019-09-28 | 287000 | 287000 |
| 66 | 2019-09-29 | 276700 | 287000 |
| 67 | 2019-09-30 | 113600 | 319700 |
| 68 | 2019-10-02 | 107300 | 319700 |
| 69 | 2019-10-03 | 319700 | 319700 |
| 70 | 2019-10-04 | 48000 | 319700 |
| 71 | 2019-10-05 | 99300 | 319700 |
| 72 | 2019-10-06 | 131600 | 319700 |
| 73 | 2019-10-07 | 117300 | 136500 |
| 74 | 2019-10-09 | 45200 | 153100 |
| 75 | 2019-10-10 | 136500 | 293100 |
| 76 | 2019-10-11 | 113100 | 293100 |
| 77 | 2019-10-12 | 153100 | 293100 |
| 78 | 2019-10-13 | 293100 | 293100 |
| 79 | 2019-10-14 | 177500 | 293100 |
| 80 | 2019-10-16 | 148400 | 293100 |
| 81 | 2019-10-17 | 186800 | 213300 |
| 82 | 2019-10-18 | 84200 | 307100 |
| 83 | 2019-10-19 | 202000 | 307100 |
| 84 | 2019-10-20 | 213300 | 307100 |
| 85 | 2019-10-21 | 307100 | 307100 |
| 86 | 2019-10-23 | 71200 | 307100 |
| 87 | 2019-10-24 | 219900 | 307100 |
| 88 | 2019-10-25 | 133800 | 219900 |
| 89 | 2019-10-26 | 128500 | 219900 |
| 90 | 2019-10-27 | 211500 | 219900 |
| 91 | 2019-10-28 | 215100 | 215100 |
| 92 | 2019-10-31 | 95600 | 236000 |
| 93 | 2019-11-01 | 114800 | 236000 |
| 94 | 2019-11-02 | 236000 | 236000 |
| 95 | 2019-11-03 | 203600 | 236000 |
| 96 | 2019-11-04 | 92100 | 236000 |
| 97 | 2019-11-06 | 129600 | 279200 |
| 98 | 2019-11-07 | 167800 | 357300 |
| 99 | 2019-11-08 | 223000 | 357300 |
| 100 | 2019-11-09 | 279200 | 357300 |
| 101 | 2019-11-10 | 357300 | 357300 |
| 102 | 2019-11-11 | 128400 | 357300 |
| 103 | 2019-11-13 | 145000 | 357300 |
| 104 | 2019-11-14 | 72000 | 319000 |
| 105 | 2019-11-15 | 170700 | 319000 |
| 106 | 2019-11-16 | 153300 | 319000 |
| 107 | 2019-11-17 | 319000 | 319000 |
| 108 | 2019-11-18 | 98700 | 319000 |
| 109 | 2019-11-20 | 136300 | 319000 |
| 110 | 2019-11-21 | 61600 | 289000 |
| 111 | 2019-11-22 | 66700 | 289000 |
| 112 | 2019-11-23 | 178200 | 289000 |
| 113 | 2019-11-24 | 289000 | 289000 |
| 114 | 2019-11-25 | 167600 | 289000 |
| 115 | 2019-11-27 | 127200 | 289000 |
| 116 | 2019-11-28 | 189400 | 295900 |
| 117 | 2019-11-29 | 197000 | 295900 |
| 118 | 2019-11-30 | 239500 | 295900 |
| 119 | 2019-12-01 | 295900 | 295900 |
| 120 | 2019-12-02 | 106100 | 295900 |
| 121 | 2019-12-04 | 215000 | 295900 |
| 122 | 2019-12-05 | 202400 | 409000 |
| 123 | 2019-12-06 | 195000 | 409000 |
| 124 | 2019-12-07 | 185700 | 409000 |
| 125 | 2019-12-08 | 409000 | 409000 |
| 126 | 2019-12-09 | 142500 | 409000 |
| 127 | 2019-12-11 | 206200 | 409000 |
| 128 | 2019-12-12 | 201500 | 206200 |
| 129 | 2019-12-13 | 105900 | 206200 |
| 130 | 2019-12-14 | 151800 | 206200 |
| 131 | 2019-12-15 | 173900 | 205900 |
| 132 | 2019-12-16 | 157800 | 205900 |
| 133 | 2019-12-18 | 205900 | 205900 |
| 134 | 2019-12-19 | 185600 | 223900 |
| 135 | 2019-12-20 | 121900 | 223900 |
| 136 | 2019-12-21 | 194000 | 223900 |
| 137 | 2019-12-22 | 223900 | 223900 |
| 138 | 2019-12-23 | 166600 | 223900 |
| 139 | 2019-12-24 | 56000 | 223900 |
| 140 | 2019-12-25 | 109500 | 369900 |
| 141 | 2019-12-26 | 92500 | 369900 |
| 142 | 2019-12-27 | 47100 | 369900 |
| 143 | 2019-12-28 | 369900 | 369900 |
| 144 | 2019-12-29 | 249600 | 369900 |
| 145 | 2019-12-30 | 238300 | 369900 |
| 146 | 2020-01-02 | 234100 | 275600 |
| 147 | 2020-01-03 | 238200 | 275600 |
| 148 | 2020-01-04 | 127700 | 275600 |
| 149 | 2020-01-05 | 275600 | 275600 |
| 150 | 2020-01-06 | 109500 | 275600 |
| 151 | 2020-01-08 | 251800 | 275600 |
| 152 | 2020-01-09 | 259900 | 307000 |
| 153 | 2020-01-10 | 146500 | 307000 |
| 154 | 2020-01-11 | 149000 | 307000 |
| 155 | 2020-01-12 | 307000 | 307000 |
| 156 | 2020-01-13 | 138800 | 307000 |
| 157 | 2020-01-15 | 264000 | 307000 |
| 158 | 2020-01-16 | 33600 | 354200 |
| 159 | 2020-01-17 | 107100 | 354200 |
| 160 | 2020-01-18 | 162700 | 354200 |
| 161 | 2020-01-19 | 354200 | 354200 |
| 162 | 2020-01-20 | 92400 | 354200 |
| 163 | 2020-01-22 | 136200 | 354200 |
| 164 | 2020-01-23 | 288400 | 288400 |
| 165 | 2020-01-24 | 53900 | 288400 |
| 166 | 2020-01-26 | 142400 | 288400 |
| 167 | 2020-01-27 | 158000 | 245900 |
| 168 | 2020-01-29 | 104400 | 245900 |
| 169 | 2020-01-30 | 245900 | 342600 |
| 170 | 2020-01-31 | 201200 | 342600 |
| 171 | 2020-02-01 | 173700 | 342600 |
| 172 | 2020-02-02 | 342600 | 342600 |
| 173 | 2020-02-03 | 93700 | 342600 |
| 174 | 2020-02-05 | 281900 | 342600 |
| 175 | 2020-02-06 | 206000 | 341700 |
| 176 | 2020-02-07 | 288800 | 341700 |
| 177 | 2020-02-08 | 252800 | 341700 |
| 178 | 2020-02-09 | 341700 | 354500 |
| 179 | 2020-02-10 | 65600 | 354500 |
| 180 | 2020-02-12 | 354500 | 354500 |
| 181 | 2020-02-13 | 110300 | 354500 |
| 182 | 2020-02-14 | 242900 | 354500 |
| 183 | 2020-02-15 | 235400 | 354500 |
| 184 | 2020-02-16 | 290200 | 290200 |
| 185 | 2020-02-17 | 183200 | 290200 |
| 186 | 2020-02-19 | 212000 | 353000 |
| 187 | 2020-02-20 | 249900 | 647100 |
| 188 | 2020-02-21 | 155700 | 647100 |
| 189 | 2020-02-22 | 353000 | 647100 |
| 190 | 2020-02-23 | 647100 | 647100 |
| 191 | 2020-02-24 | 189400 | 647100 |
| 192 | 2020-02-26 | 345100 | 647100 |
| 193 | 2020-02-27 | 197700 | 403400 |
| 194 | 2020-02-28 | 337000 | 403400 |
| 195 | 2020-02-29 | 273500 | 403400 |
| 196 | 2020-03-01 | 403400 | 403400 |
| 197 | 2020-03-02 | 273000 | 421500 |
| 198 | 2020-03-05 | 421500 | 421500 |
| 199 | 2020-03-06 | 305800 | 421500 |
| 200 | 2020-03-07 | 294700 | 421500 |
| 201 | 2020-03-08 | 356000 | 421500 |
| 202 | 2020-03-09 | 144000 | 356000 |
| 203 | 2020-03-11 | 204600 | 356000 |
| 204 | 2020-03-12 | 251800 | 259700 |
| 205 | 2020-03-13 | 224100 | 263500 |
| 206 | 2020-03-14 | 212500 | 263500 |
| 207 | 2020-03-15 | 259700 | 323600 |
| 208 | 2020-03-16 | 263500 | 323600 |
| 209 | 2020-03-18 | 323600 | 368600 |
| 210 | 2020-03-19 | 173500 | 368600 |
| 211 | 2020-03-20 | 135800 | 368600 |
| 212 | 2020-03-21 | 368600 | 368600 |
| 213 | 2020-03-22 | 257700 | 368600 |
| 214 | 2020-03-23 | 213200 | 368600 |
| 215 | 2020-03-25 | 216300 | 257700 |
| 216 | 2020-03-26 | 206700 | 320300 |
| 217 | 2020-03-27 | 198700 | 320300 |
| 218 | 2020-03-28 | 197500 | 320300 |
| 219 | 2020-03-29 | 320300 | 320300 |
| 220 | 2020-03-30 | 218600 | 320300 |
| 221 | 2020-04-01 | 230100 | 320300 |
| 222 | 2020-04-02 | 297700 | 297700 |
| 223 | 2020-04-03 | 140500 | 297700 |
| 224 | 2020-04-04 | 153000 | 297700 |
| 225 | 2020-04-05 | 225800 | 297700 |
| 226 | 2020-04-06 | 132000 | 272600 |
| 227 | 2020-04-08 | 272600 | 272600 |
| 228 | 2020-04-09 | 198600 | 272600 |
| 229 | 2020-04-10 | 149400 | 272600 |
| 230 | 2020-04-11 | 119700 | 272600 |
| 231 | 2020-04-12 | 212000 | 212000 |
| 232 | 2020-04-13 | 53300 | 212000 |
| 233 | 2020-04-15 | 202400 | 269200 |
| 234 | 2020-04-16 | 102800 | 269200 |
| 235 | 2020-04-17 | 147000 | 269200 |
| 236 | 2020-04-18 | 269200 | 269200 |
| 237 | 2020-04-19 | 140700 | 269200 |
| 238 | 2020-04-20 | 188900 | 269200 |
| 239 | 2020-04-22 | 225400 | 307900 |
| 240 | 2020-04-23 | 164300 | 307900 |
| 241 | 2020-04-24 | 165900 | 307900 |
| 242 | 2020-04-25 | 307900 | 307900 |
| 243 | 2020-04-26 | 277700 | 307900 |
| 244 | 2020-04-27 | 264600 | 307900 |
| 245 | 2020-04-29 | 118100 | 277700 |
| 246 | 2020-04-30 | 134100 | 264600 |
| 247 | 2020-05-01 | 215700 | 215700 |
| 248 | 2020-05-02 | 92700 | 215700 |
Lag and Lead#
WITH SalesTable AS (
SELECT
cast(strftime('%Y', sale_date) AS INT) sale_year,
cast(strftime('%W', sale_date) AS INT) sale_week,
Total
FROM
bakery_sales
ORDER BY sale_date
)
SELECT
sale_year,
sale_week,
sum(total) week_total,
lag(sum(total), 1) over (order by sale_week) prev_week_total,
lead(sum(total), 1) over (order by sale_week) next_week_total
FROM SalesTable
GROUP BY sale_year, sale_week
import sqlite3
import pandas as pd
conn = sqlite3.connect('bakery_sales.db')
sql_statement = """
WITH SalesTable AS (
SELECT
cast(strftime('%Y', sale_date) AS INT) sale_year,
cast(strftime('%W', sale_date) AS INT) sale_week,
Total
FROM
bakery_sales
ORDER BY sale_date
)
SELECT
sale_year,
sale_week,
sum(total) week_total,
lag(sum(total), 1) over (order by sale_week) prev_week_total,
lead(sum(total), 1) over (order by sale_week) next_week_total
FROM SalesTable
GROUP BY sale_year, sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| sale_year | sale_week | week_total | prev_week_total | next_week_total | |
|---|---|---|---|---|---|
| 0 | 2020 | 0 | 875600 | nan | 1223700.000000 |
| 1 | 2020 | 1 | 1223700 | 875600.000000 | 1060400.000000 |
| 2 | 2020 | 2 | 1060400 | 1223700.000000 | 713300.000000 |
| 3 | 2020 | 3 | 713300 | 1060400.000000 | 1225800.000000 |
| 4 | 2020 | 4 | 1225800 | 713300.000000 | 1464900.000000 |
| 5 | 2020 | 5 | 1464900 | 1225800.000000 | 1298900.000000 |
| 6 | 2020 | 6 | 1298900 | 1464900.000000 | 1800900.000000 |
| 7 | 2020 | 7 | 1800900 | 1298900.000000 | 1746100.000000 |
| 8 | 2020 | 8 | 1746100 | 1800900.000000 | 1651000.000000 |
| 9 | 2020 | 9 | 1651000 | 1746100.000000 | 1296700.000000 |
| 10 | 2020 | 10 | 1296700 | 1651000.000000 | 1522700.000000 |
| 11 | 2020 | 11 | 1522700 | 1296700.000000 | 1352700.000000 |
| 12 | 2020 | 12 | 1352700 | 1522700.000000 | 1265700.000000 |
| 13 | 2020 | 13 | 1265700 | 1352700.000000 | 1084300.000000 |
| 14 | 2020 | 14 | 1084300 | 1265700.000000 | 915400.000000 |
| 15 | 2020 | 15 | 915400 | 1084300.000000 | 1330100.000000 |
| 16 | 2020 | 16 | 1330100 | 915400.000000 | 825200.000000 |
| 17 | 2020 | 17 | 825200 | 1330100.000000 | 427000.000000 |
| 18 | 2019 | 27 | 427000 | 825200.000000 | 701100.000000 |
| 19 | 2019 | 28 | 701100 | 427000.000000 | 2529900.000000 |
| 20 | 2019 | 29 | 2529900 | 701100.000000 | 1331900.000000 |
| 21 | 2019 | 30 | 1331900 | 2529900.000000 | 1469900.000000 |
| 22 | 2019 | 31 | 1469900 | 1331900.000000 | 1357100.000000 |
| 23 | 2019 | 32 | 1357100 | 1469900.000000 | 1377200.000000 |
| 24 | 2019 | 33 | 1377200 | 1357100.000000 | 1225100.000000 |
| 25 | 2019 | 34 | 1225100 | 1377200.000000 | 1146900.000000 |
| 26 | 2019 | 35 | 1146900 | 1225100.000000 | 1062000.000000 |
| 27 | 2019 | 36 | 1062000 | 1146900.000000 | 1251100.000000 |
| 28 | 2019 | 37 | 1251100 | 1062000.000000 | 1079700.000000 |
| 29 | 2019 | 38 | 1079700 | 1251100.000000 | 819500.000000 |
| 30 | 2019 | 39 | 819500 | 1079700.000000 | 858300.000000 |
| 31 | 2019 | 40 | 858300 | 819500.000000 | 1012200.000000 |
| 32 | 2019 | 41 | 1012200 | 858300.000000 | 1072000.000000 |
| 33 | 2019 | 42 | 1072000 | 1012200.000000 | 865100.000000 |
| 34 | 2019 | 43 | 865100 | 1072000.000000 | 1249000.000000 |
| 35 | 2019 | 44 | 1249000 | 865100.000000 | 988400.000000 |
| 36 | 2019 | 45 | 988400 | 1249000.000000 | 830500.000000 |
| 37 | 2019 | 46 | 830500 | 988400.000000 | 1216600.000000 |
| 38 | 2019 | 47 | 1216600 | 830500.000000 | 1313200.000000 |
| 39 | 2019 | 48 | 1313200 | 1216600.000000 | 981800.000000 |
| 40 | 2019 | 49 | 981800 | 1313200.000000 | 1089100.000000 |
| 41 | 2019 | 50 | 1089100 | 981800.000000 | 1091200.000000 |
| 42 | 2019 | 51 | 1091200 | 1089100.000000 | 238300.000000 |
| 43 | 2019 | 52 | 238300 | 1091200.000000 | nan |
WITH SalesTable AS (
SELECT
cast(strftime('%Y', sale_date) AS INT) sale_year,
cast(strftime('%W', sale_date) AS INT) sale_week,
Total
FROM
bakery_sales
ORDER BY
sale_date
)
SELECT
sale_year,
sale_week,
sum(total) week_total,
round(
CAST(
(
sum(total) - lag(sum(total), 1) over (
order by
sale_week
)
) AS REAL
) / CAST(
lag(sum(total), 1) over (
order by
sale_week
) AS REAL
) * 100,
1
) CHANGE
FROM
SalesTable
GROUP BY
sale_year,
sale_week
import sqlite3
import pandas as pd
conn = sqlite3.connect('bakery_sales.db')
sql_statement = """
WITH SalesTable AS (
SELECT
cast(strftime('%Y', sale_date) AS INT) sale_year,
cast(strftime('%W', sale_date) AS INT) sale_week,
Total
FROM
bakery_sales
ORDER BY
sale_date
)
SELECT
sale_year,
sale_week,
sum(total) week_total,
round(
CAST(
(
sum(total) - lag(sum(total), 1) over (
order by
sale_week
)
) AS REAL
) / CAST(
lag(sum(total), 1) over (
order by
sale_week
) AS REAL
) * 100,
1
) CHANGE
FROM
SalesTable
GROUP BY
sale_year,
sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| sale_year | sale_week | week_total | CHANGE | |
|---|---|---|---|---|
| 0 | 2020 | 0 | 875600 | nan |
| 1 | 2020 | 1 | 1223700 | 39.800000 |
| 2 | 2020 | 2 | 1060400 | -13.300000 |
| 3 | 2020 | 3 | 713300 | -32.700000 |
| 4 | 2020 | 4 | 1225800 | 71.800000 |
| 5 | 2020 | 5 | 1464900 | 19.500000 |
| 6 | 2020 | 6 | 1298900 | -11.300000 |
| 7 | 2020 | 7 | 1800900 | 38.600000 |
| 8 | 2020 | 8 | 1746100 | -3.000000 |
| 9 | 2020 | 9 | 1651000 | -5.400000 |
| 10 | 2020 | 10 | 1296700 | -21.500000 |
| 11 | 2020 | 11 | 1522700 | 17.400000 |
| 12 | 2020 | 12 | 1352700 | -11.200000 |
| 13 | 2020 | 13 | 1265700 | -6.400000 |
| 14 | 2020 | 14 | 1084300 | -14.300000 |
| 15 | 2020 | 15 | 915400 | -15.600000 |
| 16 | 2020 | 16 | 1330100 | 45.300000 |
| 17 | 2020 | 17 | 825200 | -38.000000 |
| 18 | 2019 | 27 | 427000 | -48.300000 |
| 19 | 2019 | 28 | 701100 | 64.200000 |
| 20 | 2019 | 29 | 2529900 | 260.800000 |
| 21 | 2019 | 30 | 1331900 | -47.400000 |
| 22 | 2019 | 31 | 1469900 | 10.400000 |
| 23 | 2019 | 32 | 1357100 | -7.700000 |
| 24 | 2019 | 33 | 1377200 | 1.500000 |
| 25 | 2019 | 34 | 1225100 | -11.000000 |
| 26 | 2019 | 35 | 1146900 | -6.400000 |
| 27 | 2019 | 36 | 1062000 | -7.400000 |
| 28 | 2019 | 37 | 1251100 | 17.800000 |
| 29 | 2019 | 38 | 1079700 | -13.700000 |
| 30 | 2019 | 39 | 819500 | -24.100000 |
| 31 | 2019 | 40 | 858300 | 4.700000 |
| 32 | 2019 | 41 | 1012200 | 17.900000 |
| 33 | 2019 | 42 | 1072000 | 5.900000 |
| 34 | 2019 | 43 | 865100 | -19.300000 |
| 35 | 2019 | 44 | 1249000 | 44.400000 |
| 36 | 2019 | 45 | 988400 | -20.900000 |
| 37 | 2019 | 46 | 830500 | -16.000000 |
| 38 | 2019 | 47 | 1216600 | 46.500000 |
| 39 | 2019 | 48 | 1313200 | 7.900000 |
| 40 | 2019 | 49 | 981800 | -25.200000 |
| 41 | 2019 | 50 | 1089100 | 10.900000 |
| 42 | 2019 | 51 | 1091200 | 0.200000 |
| 43 | 2019 | 52 | 238300 | -78.200000 |
Ranking#
rank: returns the same ranking in case of a tie, with gaps in the rankingsrow_number: returns a unique number for each row, with rankings arbitrarily assigned in case of a tiedense_rank: returns the same ranking in the case of a tie, with no gaps in the rankings
Ref: https://blog.jooq.org/the-difference-between-row_number-rank-and-dense_rank/
SELECT
PatientGender,
PatientMaritalStatus,
count(*) StatusCount,
rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,
row_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,
dense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank
FROM
PatientCorePopulatedTable
GROUP BY PatientGender, PatientMaritalStatus
import sqlite3
import pandas as pd
conn = sqlite3.connect('100_patients.db')
sql_statement = """
SELECT
PatientGender,
PatientMaritalStatus,
count(*) StatusCount,
rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,
row_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,
dense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank
FROM
PatientCorePopulatedTable
GROUP BY PatientGender, PatientMaritalStatus
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
| PatientGender | PatientMaritalStatus | StatusCount | StatusRank | StatusRowNumber | StatusDenseRank | |
|---|---|---|---|---|---|---|
| 0 | Female | Married | 23 | 1 | 1 | 1 |
| 1 | Female | Single | 17 | 2 | 2 | 2 |
| 2 | Female | Divorced | 6 | 3 | 3 | 3 |
| 3 | Female | Unknown | 3 | 4 | 4 | 4 |
| 4 | Female | Separated | 3 | 4 | 5 | 4 |
| 5 | Male | Married | 22 | 1 | 1 | 1 |
| 6 | Male | Single | 15 | 2 | 2 | 2 |
| 7 | Male | Divorced | 5 | 3 | 3 | 3 |
| 8 | Male | Unknown | 3 | 4 | 4 | 4 |
| 9 | Male | Separated | 2 | 5 | 5 | 5 |
| 10 | Male | Widowed | 1 | 6 | 6 | 6 |