Analytic Functions#

Analytic functions allow you to group rows into windows, partitioning the data. Windows are defined using the OVER clause and optionally combined with the PARTITION subclause.

Date Reference: https://www.techonthenet.com/sqlite/functions/julianday.php

Data Windows#

SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month,
    sum(total) Monthly_Sales
FROM
    bakery_sales
GROUP BY
    Quarter,
    Month
ORDER BY 
	strftime('%m', sale_date)
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month,
    sum(total) Monthly_Sales
FROM
    bakery_sales
GROUP BY
    Quarter,
    Month
ORDER BY 
	strftime('%m', sale_date)
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  Quarter Month Monthly_Sales
0 Q1 January 4582500
1 Q1 February 6423700
2 Q1 March 6445100
3 Q2 April 4893700
4 Q2 May 308400
5 Q3 July 4076500
6 Q3 August 6100500
7 Q3 September 4895500
8 Q4 October 3959100
9 Q4 November 4543000
10 Q4 December 5009500
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	Max(sum(total)) over() max_overall_sales,
	Max(sum(total)) over(partition by quarter) max_quarter_sales
FROM SalesTable
GROUP BY Quarter, Month
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	Max(sum(total)) over() max_overall_sales,
	Max(sum(total)) over(partition by quarter) max_quarter_sales
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  Quarter Month MonthlySales max_overall_sales max_quarter_sales
0 Q1 February 6423700 6445100 6445100
1 Q1 January 4582500 6445100 6445100
2 Q1 March 6445100 6445100 6445100
3 Q2 April 4893700 6445100 4893700
4 Q2 May 308400 6445100 4893700
5 Q3 August 6100500 6445100 6100500
6 Q3 July 4076500 6445100 6100500
7 Q3 September 4895500 6445100 6100500
8 Q4 December 5009500 6445100 5009500
9 Q4 November 4543000 6445100 5009500
10 Q4 October 3959100 6445100 5009500

Localized Sorting#

WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	rank() OVER (ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	rank() OVER (ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  Quarter Month MonthlySales SalesRank
0 Q1 March 6445100 1
1 Q1 February 6423700 2
2 Q3 August 6100500 3
3 Q4 December 5009500 4
4 Q3 September 4895500 5
5 Q2 April 4893700 6
6 Q1 January 4582500 7
7 Q4 November 4543000 8
8 Q3 July 4076500 9
9 Q4 October 3959100 10
10 Q2 May 308400 11
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	rank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
    CASE
        WHEN 0 + strftime('%m', sale_date) BETWEEN 1
        AND 3 THEN 'Q1'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 4
        AND 6 THEN 'Q2'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 7
        AND 9 THEN 'Q3'
        WHEN 0 + strftime('%m', sale_date) BETWEEN 10
        AND 12 THEN 'Q4'
    END Quarter,
    CASE
        WHEN strftime('%m', sale_date) = '01' THEN 'January'
        WHEN strftime('%m', sale_date) = '02' THEN 'February'
        WHEN strftime('%m', sale_date) = '03' THEN 'March'
        WHEN strftime('%m', sale_date) = '04' THEN 'April'
        WHEN strftime('%m', sale_date) = '05' THEN 'May'
        WHEN strftime('%m', sale_date) = '06' THEN 'June'
        WHEN strftime('%m', sale_date) = '07' THEN 'July'
        WHEN strftime('%m', sale_date) = '08' THEN 'August'
        WHEN strftime('%m', sale_date) = '09' THEN 'September'
        WHEN strftime('%m', sale_date) = '10' THEN 'October'
        WHEN strftime('%m', sale_date) = '11' THEN 'November'
        WHEN strftime('%m', sale_date) = '12' THEN 'December'
    END Month, 
	Total
FROM
    bakery_sales
)
SELECT 
	Quarter,
	Month, 
	Sum(Total) MonthlySales,
	rank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank
FROM SalesTable
GROUP BY Quarter, Month
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  Quarter Month MonthlySales SalesRank
0 Q1 March 6445100 1
1 Q1 February 6423700 2
2 Q1 January 4582500 3
3 Q2 April 4893700 1
4 Q2 May 308400 2
5 Q3 August 6100500 1
6 Q3 September 4895500 2
7 Q3 July 4076500 3
8 Q4 December 5009500 1
9 Q4 November 4543000 2
10 Q4 October 3959100 3

Window Frames#

ref: https://www.sqlitetutorial.net/sqlite-window-functions/sqlite-window-frame/

WITH SalesTable AS (
SELECT
	cast(strftime('%Y', sale_date) AS INT) sale_year,
	cast(strftime('%W', sale_date) AS INT) sale_week,
	Total
FROM
    bakery_sales
ORDER BY sale_date
)
SELECT 
	sale_year,
	sale_week,
	sum(total) week_total,
	sum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum
FROM SalesTable
GROUP BY sale_year, sale_week
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
	cast(strftime('%Y', sale_date) AS INT) sale_year,
	cast(strftime('%W', sale_date) AS INT) sale_week,
	Total
FROM
    bakery_sales
ORDER BY sale_date
)
SELECT 
	sale_year,
	sale_week,
	sum(total) week_total,
	sum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum
FROM SalesTable
GROUP BY sale_year, sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  sale_year sale_week week_total rolling_sum
0 2019 27 427000 427000
1 2019 28 701100 1128100
2 2019 29 2529900 3658000
3 2019 30 1331900 4989900
4 2019 31 1469900 6459800
5 2019 32 1357100 7816900
6 2019 33 1377200 9194100
7 2019 34 1225100 10419200
8 2019 35 1146900 11566100
9 2019 36 1062000 12628100
10 2019 37 1251100 13879200
11 2019 38 1079700 14958900
12 2019 39 819500 15778400
13 2019 40 858300 16636700
14 2019 41 1012200 17648900
15 2019 42 1072000 18720900
16 2019 43 865100 19586000
17 2019 44 1249000 20835000
18 2019 45 988400 21823400
19 2019 46 830500 22653900
20 2019 47 1216600 23870500
21 2019 48 1313200 25183700
22 2019 49 981800 26165500
23 2019 50 1089100 27254600
24 2019 51 1091200 28345800
25 2019 52 238300 28584100
26 2020 0 875600 29459700
27 2020 1 1223700 30683400
28 2020 2 1060400 31743800
29 2020 3 713300 32457100
30 2020 4 1225800 33682900
31 2020 5 1464900 35147800
32 2020 6 1298900 36446700
33 2020 7 1800900 38247600
34 2020 8 1746100 39993700
35 2020 9 1651000 41644700
36 2020 10 1296700 42941400
37 2020 11 1522700 44464100
38 2020 12 1352700 45816800
39 2020 13 1265700 47082500
40 2020 14 1084300 48166800
41 2020 15 915400 49082200
42 2020 16 1330100 50412300
43 2020 17 825200 51237500
SELECT 
	sale_date,
	sum(total) total,
	max(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max
FROM bakery_sales
GROUP BY sale_date
order by sale_date
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
SELECT 
	sale_date,
	sum(total) total,
	max(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max
FROM bakery_sales
GROUP BY sale_date
order by sale_date
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  sale_date total seven_day_max
0 2019-07-11 39600 212000
1 2019-07-12 58000 212000
2 2019-07-13 117400 212000
3 2019-07-14 212000 212000
4 2019-07-15 30900 212000
5 2019-07-17 74100 212000
6 2019-07-19 196800 203400
7 2019-07-20 203400 203400
8 2019-07-21 195900 203400
9 2019-07-22 167100 259200
10 2019-07-24 177600 1593100
11 2019-07-25 259200 1593100
12 2019-07-26 1593100 1593100
13 2019-07-27 196000 1593100
14 2019-07-28 136900 1593100
15 2019-07-29 214600 1593100
16 2019-07-31 203900 309000
17 2019-08-01 215200 309000
18 2019-08-02 168000 309000
19 2019-08-03 309000 309000
20 2019-08-04 221200 309000
21 2019-08-05 277300 309000
22 2019-08-07 201000 300600
23 2019-08-08 290300 300600
24 2019-08-09 112500 300600
25 2019-08-10 300600 300600
26 2019-08-11 288200 300600
27 2019-08-12 232200 300600
28 2019-08-14 269500 288200
29 2019-08-15 203000 314800
30 2019-08-16 95900 314800
31 2019-08-17 241700 314800
32 2019-08-18 314800 314800
33 2019-08-19 272800 314800
34 2019-08-21 296700 314800
35 2019-08-22 182700 296700
36 2019-08-23 155800 296700
37 2019-08-24 264200 296700
38 2019-08-25 205000 264200
39 2019-08-26 159700 264200
40 2019-08-28 184900 257100
41 2019-08-29 254200 257100
42 2019-08-30 257100 257100
43 2019-08-31 127000 257100
44 2019-09-01 242200 277200
45 2019-09-02 132800 277200
46 2019-09-04 277200 285200
47 2019-09-05 247700 285200
48 2019-09-06 121700 285200
49 2019-09-07 285200 285200
50 2019-09-08 82300 285200
51 2019-09-09 215100 285200
52 2019-09-11 217700 217700
53 2019-09-12 185200 293300
54 2019-09-14 150700 293300
55 2019-09-15 293300 293300
56 2019-09-18 150800 301700
57 2019-09-19 301400 301700
58 2019-09-20 203000 301700
59 2019-09-21 301700 301700
60 2019-09-22 294200 301700
61 2019-09-23 174100 301700
62 2019-09-25 147400 294200
63 2019-09-26 54100 287000
64 2019-09-27 140400 287000
65 2019-09-28 287000 287000
66 2019-09-29 276700 287000
67 2019-09-30 113600 319700
68 2019-10-02 107300 319700
69 2019-10-03 319700 319700
70 2019-10-04 48000 319700
71 2019-10-05 99300 319700
72 2019-10-06 131600 319700
73 2019-10-07 117300 136500
74 2019-10-09 45200 153100
75 2019-10-10 136500 293100
76 2019-10-11 113100 293100
77 2019-10-12 153100 293100
78 2019-10-13 293100 293100
79 2019-10-14 177500 293100
80 2019-10-16 148400 293100
81 2019-10-17 186800 213300
82 2019-10-18 84200 307100
83 2019-10-19 202000 307100
84 2019-10-20 213300 307100
85 2019-10-21 307100 307100
86 2019-10-23 71200 307100
87 2019-10-24 219900 307100
88 2019-10-25 133800 219900
89 2019-10-26 128500 219900
90 2019-10-27 211500 219900
91 2019-10-28 215100 215100
92 2019-10-31 95600 236000
93 2019-11-01 114800 236000
94 2019-11-02 236000 236000
95 2019-11-03 203600 236000
96 2019-11-04 92100 236000
97 2019-11-06 129600 279200
98 2019-11-07 167800 357300
99 2019-11-08 223000 357300
100 2019-11-09 279200 357300
101 2019-11-10 357300 357300
102 2019-11-11 128400 357300
103 2019-11-13 145000 357300
104 2019-11-14 72000 319000
105 2019-11-15 170700 319000
106 2019-11-16 153300 319000
107 2019-11-17 319000 319000
108 2019-11-18 98700 319000
109 2019-11-20 136300 319000
110 2019-11-21 61600 289000
111 2019-11-22 66700 289000
112 2019-11-23 178200 289000
113 2019-11-24 289000 289000
114 2019-11-25 167600 289000
115 2019-11-27 127200 289000
116 2019-11-28 189400 295900
117 2019-11-29 197000 295900
118 2019-11-30 239500 295900
119 2019-12-01 295900 295900
120 2019-12-02 106100 295900
121 2019-12-04 215000 295900
122 2019-12-05 202400 409000
123 2019-12-06 195000 409000
124 2019-12-07 185700 409000
125 2019-12-08 409000 409000
126 2019-12-09 142500 409000
127 2019-12-11 206200 409000
128 2019-12-12 201500 206200
129 2019-12-13 105900 206200
130 2019-12-14 151800 206200
131 2019-12-15 173900 205900
132 2019-12-16 157800 205900
133 2019-12-18 205900 205900
134 2019-12-19 185600 223900
135 2019-12-20 121900 223900
136 2019-12-21 194000 223900
137 2019-12-22 223900 223900
138 2019-12-23 166600 223900
139 2019-12-24 56000 223900
140 2019-12-25 109500 369900
141 2019-12-26 92500 369900
142 2019-12-27 47100 369900
143 2019-12-28 369900 369900
144 2019-12-29 249600 369900
145 2019-12-30 238300 369900
146 2020-01-02 234100 275600
147 2020-01-03 238200 275600
148 2020-01-04 127700 275600
149 2020-01-05 275600 275600
150 2020-01-06 109500 275600
151 2020-01-08 251800 275600
152 2020-01-09 259900 307000
153 2020-01-10 146500 307000
154 2020-01-11 149000 307000
155 2020-01-12 307000 307000
156 2020-01-13 138800 307000
157 2020-01-15 264000 307000
158 2020-01-16 33600 354200
159 2020-01-17 107100 354200
160 2020-01-18 162700 354200
161 2020-01-19 354200 354200
162 2020-01-20 92400 354200
163 2020-01-22 136200 354200
164 2020-01-23 288400 288400
165 2020-01-24 53900 288400
166 2020-01-26 142400 288400
167 2020-01-27 158000 245900
168 2020-01-29 104400 245900
169 2020-01-30 245900 342600
170 2020-01-31 201200 342600
171 2020-02-01 173700 342600
172 2020-02-02 342600 342600
173 2020-02-03 93700 342600
174 2020-02-05 281900 342600
175 2020-02-06 206000 341700
176 2020-02-07 288800 341700
177 2020-02-08 252800 341700
178 2020-02-09 341700 354500
179 2020-02-10 65600 354500
180 2020-02-12 354500 354500
181 2020-02-13 110300 354500
182 2020-02-14 242900 354500
183 2020-02-15 235400 354500
184 2020-02-16 290200 290200
185 2020-02-17 183200 290200
186 2020-02-19 212000 353000
187 2020-02-20 249900 647100
188 2020-02-21 155700 647100
189 2020-02-22 353000 647100
190 2020-02-23 647100 647100
191 2020-02-24 189400 647100
192 2020-02-26 345100 647100
193 2020-02-27 197700 403400
194 2020-02-28 337000 403400
195 2020-02-29 273500 403400
196 2020-03-01 403400 403400
197 2020-03-02 273000 421500
198 2020-03-05 421500 421500
199 2020-03-06 305800 421500
200 2020-03-07 294700 421500
201 2020-03-08 356000 421500
202 2020-03-09 144000 356000
203 2020-03-11 204600 356000
204 2020-03-12 251800 259700
205 2020-03-13 224100 263500
206 2020-03-14 212500 263500
207 2020-03-15 259700 323600
208 2020-03-16 263500 323600
209 2020-03-18 323600 368600
210 2020-03-19 173500 368600
211 2020-03-20 135800 368600
212 2020-03-21 368600 368600
213 2020-03-22 257700 368600
214 2020-03-23 213200 368600
215 2020-03-25 216300 257700
216 2020-03-26 206700 320300
217 2020-03-27 198700 320300
218 2020-03-28 197500 320300
219 2020-03-29 320300 320300
220 2020-03-30 218600 320300
221 2020-04-01 230100 320300
222 2020-04-02 297700 297700
223 2020-04-03 140500 297700
224 2020-04-04 153000 297700
225 2020-04-05 225800 297700
226 2020-04-06 132000 272600
227 2020-04-08 272600 272600
228 2020-04-09 198600 272600
229 2020-04-10 149400 272600
230 2020-04-11 119700 272600
231 2020-04-12 212000 212000
232 2020-04-13 53300 212000
233 2020-04-15 202400 269200
234 2020-04-16 102800 269200
235 2020-04-17 147000 269200
236 2020-04-18 269200 269200
237 2020-04-19 140700 269200
238 2020-04-20 188900 269200
239 2020-04-22 225400 307900
240 2020-04-23 164300 307900
241 2020-04-24 165900 307900
242 2020-04-25 307900 307900
243 2020-04-26 277700 307900
244 2020-04-27 264600 307900
245 2020-04-29 118100 277700
246 2020-04-30 134100 264600
247 2020-05-01 215700 215700
248 2020-05-02 92700 215700

Lag and Lead#

WITH SalesTable AS (
SELECT
	cast(strftime('%Y', sale_date) AS INT) sale_year,
	cast(strftime('%W', sale_date) AS INT) sale_week,
	Total
FROM
    bakery_sales
ORDER BY sale_date
)
SELECT 
	sale_year,
	sale_week,
	sum(total) week_total,
	lag(sum(total), 1) over (order by sale_week) prev_week_total,
	lead(sum(total), 1) over (order by sale_week) next_week_total
FROM SalesTable
GROUP BY sale_year, sale_week
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
SELECT
	cast(strftime('%Y', sale_date) AS INT) sale_year,
	cast(strftime('%W', sale_date) AS INT) sale_week,
	Total
FROM
    bakery_sales
ORDER BY sale_date
)
SELECT 
	sale_year,
	sale_week,
	sum(total) week_total,
	lag(sum(total), 1) over (order by sale_week) prev_week_total,
	lead(sum(total), 1) over (order by sale_week) next_week_total
FROM SalesTable
GROUP BY sale_year, sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  sale_year sale_week week_total prev_week_total next_week_total
0 2020 0 875600 nan 1223700.000000
1 2020 1 1223700 875600.000000 1060400.000000
2 2020 2 1060400 1223700.000000 713300.000000
3 2020 3 713300 1060400.000000 1225800.000000
4 2020 4 1225800 713300.000000 1464900.000000
5 2020 5 1464900 1225800.000000 1298900.000000
6 2020 6 1298900 1464900.000000 1800900.000000
7 2020 7 1800900 1298900.000000 1746100.000000
8 2020 8 1746100 1800900.000000 1651000.000000
9 2020 9 1651000 1746100.000000 1296700.000000
10 2020 10 1296700 1651000.000000 1522700.000000
11 2020 11 1522700 1296700.000000 1352700.000000
12 2020 12 1352700 1522700.000000 1265700.000000
13 2020 13 1265700 1352700.000000 1084300.000000
14 2020 14 1084300 1265700.000000 915400.000000
15 2020 15 915400 1084300.000000 1330100.000000
16 2020 16 1330100 915400.000000 825200.000000
17 2020 17 825200 1330100.000000 427000.000000
18 2019 27 427000 825200.000000 701100.000000
19 2019 28 701100 427000.000000 2529900.000000
20 2019 29 2529900 701100.000000 1331900.000000
21 2019 30 1331900 2529900.000000 1469900.000000
22 2019 31 1469900 1331900.000000 1357100.000000
23 2019 32 1357100 1469900.000000 1377200.000000
24 2019 33 1377200 1357100.000000 1225100.000000
25 2019 34 1225100 1377200.000000 1146900.000000
26 2019 35 1146900 1225100.000000 1062000.000000
27 2019 36 1062000 1146900.000000 1251100.000000
28 2019 37 1251100 1062000.000000 1079700.000000
29 2019 38 1079700 1251100.000000 819500.000000
30 2019 39 819500 1079700.000000 858300.000000
31 2019 40 858300 819500.000000 1012200.000000
32 2019 41 1012200 858300.000000 1072000.000000
33 2019 42 1072000 1012200.000000 865100.000000
34 2019 43 865100 1072000.000000 1249000.000000
35 2019 44 1249000 865100.000000 988400.000000
36 2019 45 988400 1249000.000000 830500.000000
37 2019 46 830500 988400.000000 1216600.000000
38 2019 47 1216600 830500.000000 1313200.000000
39 2019 48 1313200 1216600.000000 981800.000000
40 2019 49 981800 1313200.000000 1089100.000000
41 2019 50 1089100 981800.000000 1091200.000000
42 2019 51 1091200 1089100.000000 238300.000000
43 2019 52 238300 1091200.000000 nan
WITH SalesTable AS (
    SELECT
        cast(strftime('%Y', sale_date) AS INT) sale_year,
        cast(strftime('%W', sale_date) AS INT) sale_week,
        Total
    FROM
        bakery_sales
    ORDER BY
        sale_date
)
SELECT
    sale_year,
    sale_week,
    sum(total) week_total,
    round(
        CAST(
            (
                sum(total) - lag(sum(total), 1) over (
                    order by
                        sale_week
                )
            ) AS REAL
        ) / CAST(
            lag(sum(total), 1) over (
                order by
                    sale_week
            ) AS REAL
        ) * 100,
        1
    ) CHANGE
FROM
    SalesTable
GROUP BY
    sale_year,
    sale_week
import sqlite3
import pandas as pd

conn = sqlite3.connect('bakery_sales.db')

sql_statement = """
WITH SalesTable AS (
    SELECT
        cast(strftime('%Y', sale_date) AS INT) sale_year,
        cast(strftime('%W', sale_date) AS INT) sale_week,
        Total
    FROM
        bakery_sales
    ORDER BY
        sale_date
)
SELECT
    sale_year,
    sale_week,
    sum(total) week_total,
    round(
        CAST(
            (
                sum(total) - lag(sum(total), 1) over (
                    order by
                        sale_week
                )
            ) AS REAL
        ) / CAST(
            lag(sum(total), 1) over (
                order by
                    sale_week
            ) AS REAL
        ) * 100,
        1
    ) CHANGE
FROM
    SalesTable
GROUP BY
    sale_year,
    sale_week
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  sale_year sale_week week_total CHANGE
0 2020 0 875600 nan
1 2020 1 1223700 39.800000
2 2020 2 1060400 -13.300000
3 2020 3 713300 -32.700000
4 2020 4 1225800 71.800000
5 2020 5 1464900 19.500000
6 2020 6 1298900 -11.300000
7 2020 7 1800900 38.600000
8 2020 8 1746100 -3.000000
9 2020 9 1651000 -5.400000
10 2020 10 1296700 -21.500000
11 2020 11 1522700 17.400000
12 2020 12 1352700 -11.200000
13 2020 13 1265700 -6.400000
14 2020 14 1084300 -14.300000
15 2020 15 915400 -15.600000
16 2020 16 1330100 45.300000
17 2020 17 825200 -38.000000
18 2019 27 427000 -48.300000
19 2019 28 701100 64.200000
20 2019 29 2529900 260.800000
21 2019 30 1331900 -47.400000
22 2019 31 1469900 10.400000
23 2019 32 1357100 -7.700000
24 2019 33 1377200 1.500000
25 2019 34 1225100 -11.000000
26 2019 35 1146900 -6.400000
27 2019 36 1062000 -7.400000
28 2019 37 1251100 17.800000
29 2019 38 1079700 -13.700000
30 2019 39 819500 -24.100000
31 2019 40 858300 4.700000
32 2019 41 1012200 17.900000
33 2019 42 1072000 5.900000
34 2019 43 865100 -19.300000
35 2019 44 1249000 44.400000
36 2019 45 988400 -20.900000
37 2019 46 830500 -16.000000
38 2019 47 1216600 46.500000
39 2019 48 1313200 7.900000
40 2019 49 981800 -25.200000
41 2019 50 1089100 10.900000
42 2019 51 1091200 0.200000
43 2019 52 238300 -78.200000

Ranking#

  • rank: returns the same ranking in case of a tie, with gaps in the rankings

  • row_number: returns a unique number for each row, with rankings arbitrarily assigned in case of a tie

  • dense_rank: returns the same ranking in the case of a tie, with no gaps in the rankings

Ref: https://blog.jooq.org/the-difference-between-row_number-rank-and-dense_rank/

SELECT 
	PatientGender,
	PatientMaritalStatus, 
	count(*) StatusCount, 
	rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,
	row_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,
	dense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank
FROM 
PatientCorePopulatedTable
GROUP BY PatientGender, PatientMaritalStatus
import sqlite3
import pandas as pd

conn = sqlite3.connect('100_patients.db')

sql_statement = """
SELECT 
	PatientGender,
	PatientMaritalStatus, 
	count(*) StatusCount, 
	rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,
	row_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,
	dense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank
FROM 
PatientCorePopulatedTable
GROUP BY PatientGender, PatientMaritalStatus
"""
df = pd.read_sql_query(sql_statement, conn)
df.style.set_table_attributes('style="font-size: 12px"')
  PatientGender PatientMaritalStatus StatusCount StatusRank StatusRowNumber StatusDenseRank
0 Female Married 23 1 1 1
1 Female Single 17 2 2 2
2 Female Divorced 6 3 3 3
3 Female Unknown 3 4 4 4
4 Female Separated 3 4 5 4
5 Male Married 22 1 1 1
6 Male Single 15 2 2 2
7 Male Divorced 5 3 3 3
8 Male Unknown 3 4 4 4
9 Male Separated 2 5 5 5
10 Male Widowed 1 6 6 6