{
"cells": [
{
"cell_type": "markdown",
"id": "2540c433",
"metadata": {},
"source": [
"# Analytic Functions\n",
"\n",
"Analytic functions allow you to group rows into windows, partitioning\n",
"the data. Windows are defined using the `OVER` clause and optionally\n",
"combined with the `PARTITION` subclause. \n",
"\n",
"Date Reference: https://www.techonthenet.com/sqlite/functions/julianday.php\n",
"\n",
"## Data Windows\n",
"\n",
"```SQL\n",
"SELECT\n",
" CASE\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n",
" AND 3 THEN 'Q1'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n",
" AND 6 THEN 'Q2'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n",
" AND 9 THEN 'Q3'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n",
" AND 12 THEN 'Q4'\n",
" END Quarter,\n",
" CASE\n",
" WHEN strftime('%m', sale_date) = '01' THEN 'January'\n",
" WHEN strftime('%m', sale_date) = '02' THEN 'February'\n",
" WHEN strftime('%m', sale_date) = '03' THEN 'March'\n",
" WHEN strftime('%m', sale_date) = '04' THEN 'April'\n",
" WHEN strftime('%m', sale_date) = '05' THEN 'May'\n",
" WHEN strftime('%m', sale_date) = '06' THEN 'June'\n",
" WHEN strftime('%m', sale_date) = '07' THEN 'July'\n",
" WHEN strftime('%m', sale_date) = '08' THEN 'August'\n",
" WHEN strftime('%m', sale_date) = '09' THEN 'September'\n",
" WHEN strftime('%m', sale_date) = '10' THEN 'October'\n",
" WHEN strftime('%m', sale_date) = '11' THEN 'November'\n",
" WHEN strftime('%m', sale_date) = '12' THEN 'December'\n",
" END Month,\n",
" sum(total) Monthly_Sales\n",
"FROM\n",
" bakery_sales\n",
"GROUP BY\n",
" Quarter,\n",
" Month\n",
"ORDER BY \n",
"\tstrftime('%m', sale_date)\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d01adbef",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Quarter | \n",
" Month | \n",
" Monthly_Sales | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Q1 | \n",
" January | \n",
" 4582500 | \n",
"
\n",
" \n",
" 1 | \n",
" Q1 | \n",
" February | \n",
" 6423700 | \n",
"
\n",
" \n",
" 2 | \n",
" Q1 | \n",
" March | \n",
" 6445100 | \n",
"
\n",
" \n",
" 3 | \n",
" Q2 | \n",
" April | \n",
" 4893700 | \n",
"
\n",
" \n",
" 4 | \n",
" Q2 | \n",
" May | \n",
" 308400 | \n",
"
\n",
" \n",
" 5 | \n",
" Q3 | \n",
" July | \n",
" 4076500 | \n",
"
\n",
" \n",
" 6 | \n",
" Q3 | \n",
" August | \n",
" 6100500 | \n",
"
\n",
" \n",
" 7 | \n",
" Q3 | \n",
" September | \n",
" 4895500 | \n",
"
\n",
" \n",
" 8 | \n",
" Q4 | \n",
" October | \n",
" 3959100 | \n",
"
\n",
" \n",
" 9 | \n",
" Q4 | \n",
" November | \n",
" 4543000 | \n",
"
\n",
" \n",
" 10 | \n",
" Q4 | \n",
" December | \n",
" 5009500 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('bakery_sales.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"SELECT\n",
" CASE\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n",
" AND 3 THEN 'Q1'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n",
" AND 6 THEN 'Q2'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n",
" AND 9 THEN 'Q3'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n",
" AND 12 THEN 'Q4'\n",
" END Quarter,\n",
" CASE\n",
" WHEN strftime('%m', sale_date) = '01' THEN 'January'\n",
" WHEN strftime('%m', sale_date) = '02' THEN 'February'\n",
" WHEN strftime('%m', sale_date) = '03' THEN 'March'\n",
" WHEN strftime('%m', sale_date) = '04' THEN 'April'\n",
" WHEN strftime('%m', sale_date) = '05' THEN 'May'\n",
" WHEN strftime('%m', sale_date) = '06' THEN 'June'\n",
" WHEN strftime('%m', sale_date) = '07' THEN 'July'\n",
" WHEN strftime('%m', sale_date) = '08' THEN 'August'\n",
" WHEN strftime('%m', sale_date) = '09' THEN 'September'\n",
" WHEN strftime('%m', sale_date) = '10' THEN 'October'\n",
" WHEN strftime('%m', sale_date) = '11' THEN 'November'\n",
" WHEN strftime('%m', sale_date) = '12' THEN 'December'\n",
" END Month,\n",
" sum(total) Monthly_Sales\n",
"FROM\n",
" bakery_sales\n",
"GROUP BY\n",
" Quarter,\n",
" Month\n",
"ORDER BY \n",
"\tstrftime('%m', sale_date)\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
},
{
"cell_type": "markdown",
"id": "0dcb8f2c",
"metadata": {},
"source": [
"```SQL\n",
"WITH SalesTable AS (\n",
"SELECT\n",
" CASE\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n",
" AND 3 THEN 'Q1'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n",
" AND 6 THEN 'Q2'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n",
" AND 9 THEN 'Q3'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n",
" AND 12 THEN 'Q4'\n",
" END Quarter,\n",
" CASE\n",
" WHEN strftime('%m', sale_date) = '01' THEN 'January'\n",
" WHEN strftime('%m', sale_date) = '02' THEN 'February'\n",
" WHEN strftime('%m', sale_date) = '03' THEN 'March'\n",
" WHEN strftime('%m', sale_date) = '04' THEN 'April'\n",
" WHEN strftime('%m', sale_date) = '05' THEN 'May'\n",
" WHEN strftime('%m', sale_date) = '06' THEN 'June'\n",
" WHEN strftime('%m', sale_date) = '07' THEN 'July'\n",
" WHEN strftime('%m', sale_date) = '08' THEN 'August'\n",
" WHEN strftime('%m', sale_date) = '09' THEN 'September'\n",
" WHEN strftime('%m', sale_date) = '10' THEN 'October'\n",
" WHEN strftime('%m', sale_date) = '11' THEN 'November'\n",
" WHEN strftime('%m', sale_date) = '12' THEN 'December'\n",
" END Month, \n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
")\n",
"SELECT \n",
"\tQuarter,\n",
"\tMonth, \n",
"\tSum(Total) MonthlySales,\n",
"\tMax(sum(total)) over() max_overall_sales,\n",
"\tMax(sum(total)) over(partition by quarter) max_quarter_sales\n",
"FROM SalesTable\n",
"GROUP BY Quarter, Month\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "0e5011b2",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Quarter | \n",
" Month | \n",
" MonthlySales | \n",
" max_overall_sales | \n",
" max_quarter_sales | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Q1 | \n",
" February | \n",
" 6423700 | \n",
" 6445100 | \n",
" 6445100 | \n",
"
\n",
" \n",
" 1 | \n",
" Q1 | \n",
" January | \n",
" 4582500 | \n",
" 6445100 | \n",
" 6445100 | \n",
"
\n",
" \n",
" 2 | \n",
" Q1 | \n",
" March | \n",
" 6445100 | \n",
" 6445100 | \n",
" 6445100 | \n",
"
\n",
" \n",
" 3 | \n",
" Q2 | \n",
" April | \n",
" 4893700 | \n",
" 6445100 | \n",
" 4893700 | \n",
"
\n",
" \n",
" 4 | \n",
" Q2 | \n",
" May | \n",
" 308400 | \n",
" 6445100 | \n",
" 4893700 | \n",
"
\n",
" \n",
" 5 | \n",
" Q3 | \n",
" August | \n",
" 6100500 | \n",
" 6445100 | \n",
" 6100500 | \n",
"
\n",
" \n",
" 6 | \n",
" Q3 | \n",
" July | \n",
" 4076500 | \n",
" 6445100 | \n",
" 6100500 | \n",
"
\n",
" \n",
" 7 | \n",
" Q3 | \n",
" September | \n",
" 4895500 | \n",
" 6445100 | \n",
" 6100500 | \n",
"
\n",
" \n",
" 8 | \n",
" Q4 | \n",
" December | \n",
" 5009500 | \n",
" 6445100 | \n",
" 5009500 | \n",
"
\n",
" \n",
" 9 | \n",
" Q4 | \n",
" November | \n",
" 4543000 | \n",
" 6445100 | \n",
" 5009500 | \n",
"
\n",
" \n",
" 10 | \n",
" Q4 | \n",
" October | \n",
" 3959100 | \n",
" 6445100 | \n",
" 5009500 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('bakery_sales.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"WITH SalesTable AS (\n",
"SELECT\n",
" CASE\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n",
" AND 3 THEN 'Q1'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n",
" AND 6 THEN 'Q2'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n",
" AND 9 THEN 'Q3'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n",
" AND 12 THEN 'Q4'\n",
" END Quarter,\n",
" CASE\n",
" WHEN strftime('%m', sale_date) = '01' THEN 'January'\n",
" WHEN strftime('%m', sale_date) = '02' THEN 'February'\n",
" WHEN strftime('%m', sale_date) = '03' THEN 'March'\n",
" WHEN strftime('%m', sale_date) = '04' THEN 'April'\n",
" WHEN strftime('%m', sale_date) = '05' THEN 'May'\n",
" WHEN strftime('%m', sale_date) = '06' THEN 'June'\n",
" WHEN strftime('%m', sale_date) = '07' THEN 'July'\n",
" WHEN strftime('%m', sale_date) = '08' THEN 'August'\n",
" WHEN strftime('%m', sale_date) = '09' THEN 'September'\n",
" WHEN strftime('%m', sale_date) = '10' THEN 'October'\n",
" WHEN strftime('%m', sale_date) = '11' THEN 'November'\n",
" WHEN strftime('%m', sale_date) = '12' THEN 'December'\n",
" END Month, \n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
")\n",
"SELECT \n",
"\tQuarter,\n",
"\tMonth, \n",
"\tSum(Total) MonthlySales,\n",
"\tMax(sum(total)) over() max_overall_sales,\n",
"\tMax(sum(total)) over(partition by quarter) max_quarter_sales\n",
"FROM SalesTable\n",
"GROUP BY Quarter, Month\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
},
{
"cell_type": "markdown",
"id": "5ffb1fef",
"metadata": {},
"source": [
"## Localized Sorting\n",
"\n",
"\n",
"```SQL\n",
"WITH SalesTable AS (\n",
"SELECT\n",
" CASE\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n",
" AND 3 THEN 'Q1'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n",
" AND 6 THEN 'Q2'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n",
" AND 9 THEN 'Q3'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n",
" AND 12 THEN 'Q4'\n",
" END Quarter,\n",
" CASE\n",
" WHEN strftime('%m', sale_date) = '01' THEN 'January'\n",
" WHEN strftime('%m', sale_date) = '02' THEN 'February'\n",
" WHEN strftime('%m', sale_date) = '03' THEN 'March'\n",
" WHEN strftime('%m', sale_date) = '04' THEN 'April'\n",
" WHEN strftime('%m', sale_date) = '05' THEN 'May'\n",
" WHEN strftime('%m', sale_date) = '06' THEN 'June'\n",
" WHEN strftime('%m', sale_date) = '07' THEN 'July'\n",
" WHEN strftime('%m', sale_date) = '08' THEN 'August'\n",
" WHEN strftime('%m', sale_date) = '09' THEN 'September'\n",
" WHEN strftime('%m', sale_date) = '10' THEN 'October'\n",
" WHEN strftime('%m', sale_date) = '11' THEN 'November'\n",
" WHEN strftime('%m', sale_date) = '12' THEN 'December'\n",
" END Month, \n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
")\n",
"SELECT \n",
"\tQuarter,\n",
"\tMonth, \n",
"\tSum(Total) MonthlySales,\n",
"\trank() OVER (ORDER BY -sum(total)) SalesRank\n",
"FROM SalesTable\n",
"GROUP BY Quarter, Month\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1764dacd",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Quarter | \n",
" Month | \n",
" MonthlySales | \n",
" SalesRank | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Q1 | \n",
" March | \n",
" 6445100 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" Q1 | \n",
" February | \n",
" 6423700 | \n",
" 2 | \n",
"
\n",
" \n",
" 2 | \n",
" Q3 | \n",
" August | \n",
" 6100500 | \n",
" 3 | \n",
"
\n",
" \n",
" 3 | \n",
" Q4 | \n",
" December | \n",
" 5009500 | \n",
" 4 | \n",
"
\n",
" \n",
" 4 | \n",
" Q3 | \n",
" September | \n",
" 4895500 | \n",
" 5 | \n",
"
\n",
" \n",
" 5 | \n",
" Q2 | \n",
" April | \n",
" 4893700 | \n",
" 6 | \n",
"
\n",
" \n",
" 6 | \n",
" Q1 | \n",
" January | \n",
" 4582500 | \n",
" 7 | \n",
"
\n",
" \n",
" 7 | \n",
" Q4 | \n",
" November | \n",
" 4543000 | \n",
" 8 | \n",
"
\n",
" \n",
" 8 | \n",
" Q3 | \n",
" July | \n",
" 4076500 | \n",
" 9 | \n",
"
\n",
" \n",
" 9 | \n",
" Q4 | \n",
" October | \n",
" 3959100 | \n",
" 10 | \n",
"
\n",
" \n",
" 10 | \n",
" Q2 | \n",
" May | \n",
" 308400 | \n",
" 11 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('bakery_sales.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"WITH SalesTable AS (\n",
"SELECT\n",
" CASE\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n",
" AND 3 THEN 'Q1'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n",
" AND 6 THEN 'Q2'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n",
" AND 9 THEN 'Q3'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n",
" AND 12 THEN 'Q4'\n",
" END Quarter,\n",
" CASE\n",
" WHEN strftime('%m', sale_date) = '01' THEN 'January'\n",
" WHEN strftime('%m', sale_date) = '02' THEN 'February'\n",
" WHEN strftime('%m', sale_date) = '03' THEN 'March'\n",
" WHEN strftime('%m', sale_date) = '04' THEN 'April'\n",
" WHEN strftime('%m', sale_date) = '05' THEN 'May'\n",
" WHEN strftime('%m', sale_date) = '06' THEN 'June'\n",
" WHEN strftime('%m', sale_date) = '07' THEN 'July'\n",
" WHEN strftime('%m', sale_date) = '08' THEN 'August'\n",
" WHEN strftime('%m', sale_date) = '09' THEN 'September'\n",
" WHEN strftime('%m', sale_date) = '10' THEN 'October'\n",
" WHEN strftime('%m', sale_date) = '11' THEN 'November'\n",
" WHEN strftime('%m', sale_date) = '12' THEN 'December'\n",
" END Month, \n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
")\n",
"SELECT \n",
"\tQuarter,\n",
"\tMonth, \n",
"\tSum(Total) MonthlySales,\n",
"\trank() OVER (ORDER BY -sum(total)) SalesRank\n",
"FROM SalesTable\n",
"GROUP BY Quarter, Month\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
},
{
"cell_type": "markdown",
"id": "76d04b33",
"metadata": {},
"source": [
"```SQL\n",
"WITH SalesTable AS (\n",
"SELECT\n",
" CASE\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n",
" AND 3 THEN 'Q1'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n",
" AND 6 THEN 'Q2'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n",
" AND 9 THEN 'Q3'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n",
" AND 12 THEN 'Q4'\n",
" END Quarter,\n",
" CASE\n",
" WHEN strftime('%m', sale_date) = '01' THEN 'January'\n",
" WHEN strftime('%m', sale_date) = '02' THEN 'February'\n",
" WHEN strftime('%m', sale_date) = '03' THEN 'March'\n",
" WHEN strftime('%m', sale_date) = '04' THEN 'April'\n",
" WHEN strftime('%m', sale_date) = '05' THEN 'May'\n",
" WHEN strftime('%m', sale_date) = '06' THEN 'June'\n",
" WHEN strftime('%m', sale_date) = '07' THEN 'July'\n",
" WHEN strftime('%m', sale_date) = '08' THEN 'August'\n",
" WHEN strftime('%m', sale_date) = '09' THEN 'September'\n",
" WHEN strftime('%m', sale_date) = '10' THEN 'October'\n",
" WHEN strftime('%m', sale_date) = '11' THEN 'November'\n",
" WHEN strftime('%m', sale_date) = '12' THEN 'December'\n",
" END Month, \n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
")\n",
"SELECT \n",
"\tQuarter,\n",
"\tMonth, \n",
"\tSum(Total) MonthlySales,\n",
"\trank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank\n",
"FROM SalesTable\n",
"GROUP BY Quarter, Month\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fe8aa08f",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Quarter | \n",
" Month | \n",
" MonthlySales | \n",
" SalesRank | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Q1 | \n",
" March | \n",
" 6445100 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" Q1 | \n",
" February | \n",
" 6423700 | \n",
" 2 | \n",
"
\n",
" \n",
" 2 | \n",
" Q1 | \n",
" January | \n",
" 4582500 | \n",
" 3 | \n",
"
\n",
" \n",
" 3 | \n",
" Q2 | \n",
" April | \n",
" 4893700 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" Q2 | \n",
" May | \n",
" 308400 | \n",
" 2 | \n",
"
\n",
" \n",
" 5 | \n",
" Q3 | \n",
" August | \n",
" 6100500 | \n",
" 1 | \n",
"
\n",
" \n",
" 6 | \n",
" Q3 | \n",
" September | \n",
" 4895500 | \n",
" 2 | \n",
"
\n",
" \n",
" 7 | \n",
" Q3 | \n",
" July | \n",
" 4076500 | \n",
" 3 | \n",
"
\n",
" \n",
" 8 | \n",
" Q4 | \n",
" December | \n",
" 5009500 | \n",
" 1 | \n",
"
\n",
" \n",
" 9 | \n",
" Q4 | \n",
" November | \n",
" 4543000 | \n",
" 2 | \n",
"
\n",
" \n",
" 10 | \n",
" Q4 | \n",
" October | \n",
" 3959100 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('bakery_sales.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"WITH SalesTable AS (\n",
"SELECT\n",
" CASE\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n",
" AND 3 THEN 'Q1'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n",
" AND 6 THEN 'Q2'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n",
" AND 9 THEN 'Q3'\n",
" WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n",
" AND 12 THEN 'Q4'\n",
" END Quarter,\n",
" CASE\n",
" WHEN strftime('%m', sale_date) = '01' THEN 'January'\n",
" WHEN strftime('%m', sale_date) = '02' THEN 'February'\n",
" WHEN strftime('%m', sale_date) = '03' THEN 'March'\n",
" WHEN strftime('%m', sale_date) = '04' THEN 'April'\n",
" WHEN strftime('%m', sale_date) = '05' THEN 'May'\n",
" WHEN strftime('%m', sale_date) = '06' THEN 'June'\n",
" WHEN strftime('%m', sale_date) = '07' THEN 'July'\n",
" WHEN strftime('%m', sale_date) = '08' THEN 'August'\n",
" WHEN strftime('%m', sale_date) = '09' THEN 'September'\n",
" WHEN strftime('%m', sale_date) = '10' THEN 'October'\n",
" WHEN strftime('%m', sale_date) = '11' THEN 'November'\n",
" WHEN strftime('%m', sale_date) = '12' THEN 'December'\n",
" END Month, \n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
")\n",
"SELECT \n",
"\tQuarter,\n",
"\tMonth, \n",
"\tSum(Total) MonthlySales,\n",
"\trank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank\n",
"FROM SalesTable\n",
"GROUP BY Quarter, Month\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
},
{
"cell_type": "markdown",
"id": "b07814fc",
"metadata": {},
"source": [
"## Window Frames\n",
"\n",
"ref: https://www.sqlitetutorial.net/sqlite-window-functions/sqlite-window-frame/\n",
"\n",
"```SQL\n",
"WITH SalesTable AS (\n",
"SELECT\n",
"\tcast(strftime('%Y', sale_date) AS INT) sale_year,\n",
"\tcast(strftime('%W', sale_date) AS INT) sale_week,\n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
"ORDER BY sale_date\n",
")\n",
"SELECT \n",
"\tsale_year,\n",
"\tsale_week,\n",
"\tsum(total) week_total,\n",
"\tsum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum\n",
"FROM SalesTable\n",
"GROUP BY sale_year, sale_week\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "0e9c9619",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" sale_year | \n",
" sale_week | \n",
" week_total | \n",
" rolling_sum | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2019 | \n",
" 27 | \n",
" 427000 | \n",
" 427000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2019 | \n",
" 28 | \n",
" 701100 | \n",
" 1128100 | \n",
"
\n",
" \n",
" 2 | \n",
" 2019 | \n",
" 29 | \n",
" 2529900 | \n",
" 3658000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2019 | \n",
" 30 | \n",
" 1331900 | \n",
" 4989900 | \n",
"
\n",
" \n",
" 4 | \n",
" 2019 | \n",
" 31 | \n",
" 1469900 | \n",
" 6459800 | \n",
"
\n",
" \n",
" 5 | \n",
" 2019 | \n",
" 32 | \n",
" 1357100 | \n",
" 7816900 | \n",
"
\n",
" \n",
" 6 | \n",
" 2019 | \n",
" 33 | \n",
" 1377200 | \n",
" 9194100 | \n",
"
\n",
" \n",
" 7 | \n",
" 2019 | \n",
" 34 | \n",
" 1225100 | \n",
" 10419200 | \n",
"
\n",
" \n",
" 8 | \n",
" 2019 | \n",
" 35 | \n",
" 1146900 | \n",
" 11566100 | \n",
"
\n",
" \n",
" 9 | \n",
" 2019 | \n",
" 36 | \n",
" 1062000 | \n",
" 12628100 | \n",
"
\n",
" \n",
" 10 | \n",
" 2019 | \n",
" 37 | \n",
" 1251100 | \n",
" 13879200 | \n",
"
\n",
" \n",
" 11 | \n",
" 2019 | \n",
" 38 | \n",
" 1079700 | \n",
" 14958900 | \n",
"
\n",
" \n",
" 12 | \n",
" 2019 | \n",
" 39 | \n",
" 819500 | \n",
" 15778400 | \n",
"
\n",
" \n",
" 13 | \n",
" 2019 | \n",
" 40 | \n",
" 858300 | \n",
" 16636700 | \n",
"
\n",
" \n",
" 14 | \n",
" 2019 | \n",
" 41 | \n",
" 1012200 | \n",
" 17648900 | \n",
"
\n",
" \n",
" 15 | \n",
" 2019 | \n",
" 42 | \n",
" 1072000 | \n",
" 18720900 | \n",
"
\n",
" \n",
" 16 | \n",
" 2019 | \n",
" 43 | \n",
" 865100 | \n",
" 19586000 | \n",
"
\n",
" \n",
" 17 | \n",
" 2019 | \n",
" 44 | \n",
" 1249000 | \n",
" 20835000 | \n",
"
\n",
" \n",
" 18 | \n",
" 2019 | \n",
" 45 | \n",
" 988400 | \n",
" 21823400 | \n",
"
\n",
" \n",
" 19 | \n",
" 2019 | \n",
" 46 | \n",
" 830500 | \n",
" 22653900 | \n",
"
\n",
" \n",
" 20 | \n",
" 2019 | \n",
" 47 | \n",
" 1216600 | \n",
" 23870500 | \n",
"
\n",
" \n",
" 21 | \n",
" 2019 | \n",
" 48 | \n",
" 1313200 | \n",
" 25183700 | \n",
"
\n",
" \n",
" 22 | \n",
" 2019 | \n",
" 49 | \n",
" 981800 | \n",
" 26165500 | \n",
"
\n",
" \n",
" 23 | \n",
" 2019 | \n",
" 50 | \n",
" 1089100 | \n",
" 27254600 | \n",
"
\n",
" \n",
" 24 | \n",
" 2019 | \n",
" 51 | \n",
" 1091200 | \n",
" 28345800 | \n",
"
\n",
" \n",
" 25 | \n",
" 2019 | \n",
" 52 | \n",
" 238300 | \n",
" 28584100 | \n",
"
\n",
" \n",
" 26 | \n",
" 2020 | \n",
" 0 | \n",
" 875600 | \n",
" 29459700 | \n",
"
\n",
" \n",
" 27 | \n",
" 2020 | \n",
" 1 | \n",
" 1223700 | \n",
" 30683400 | \n",
"
\n",
" \n",
" 28 | \n",
" 2020 | \n",
" 2 | \n",
" 1060400 | \n",
" 31743800 | \n",
"
\n",
" \n",
" 29 | \n",
" 2020 | \n",
" 3 | \n",
" 713300 | \n",
" 32457100 | \n",
"
\n",
" \n",
" 30 | \n",
" 2020 | \n",
" 4 | \n",
" 1225800 | \n",
" 33682900 | \n",
"
\n",
" \n",
" 31 | \n",
" 2020 | \n",
" 5 | \n",
" 1464900 | \n",
" 35147800 | \n",
"
\n",
" \n",
" 32 | \n",
" 2020 | \n",
" 6 | \n",
" 1298900 | \n",
" 36446700 | \n",
"
\n",
" \n",
" 33 | \n",
" 2020 | \n",
" 7 | \n",
" 1800900 | \n",
" 38247600 | \n",
"
\n",
" \n",
" 34 | \n",
" 2020 | \n",
" 8 | \n",
" 1746100 | \n",
" 39993700 | \n",
"
\n",
" \n",
" 35 | \n",
" 2020 | \n",
" 9 | \n",
" 1651000 | \n",
" 41644700 | \n",
"
\n",
" \n",
" 36 | \n",
" 2020 | \n",
" 10 | \n",
" 1296700 | \n",
" 42941400 | \n",
"
\n",
" \n",
" 37 | \n",
" 2020 | \n",
" 11 | \n",
" 1522700 | \n",
" 44464100 | \n",
"
\n",
" \n",
" 38 | \n",
" 2020 | \n",
" 12 | \n",
" 1352700 | \n",
" 45816800 | \n",
"
\n",
" \n",
" 39 | \n",
" 2020 | \n",
" 13 | \n",
" 1265700 | \n",
" 47082500 | \n",
"
\n",
" \n",
" 40 | \n",
" 2020 | \n",
" 14 | \n",
" 1084300 | \n",
" 48166800 | \n",
"
\n",
" \n",
" 41 | \n",
" 2020 | \n",
" 15 | \n",
" 915400 | \n",
" 49082200 | \n",
"
\n",
" \n",
" 42 | \n",
" 2020 | \n",
" 16 | \n",
" 1330100 | \n",
" 50412300 | \n",
"
\n",
" \n",
" 43 | \n",
" 2020 | \n",
" 17 | \n",
" 825200 | \n",
" 51237500 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('bakery_sales.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"WITH SalesTable AS (\n",
"SELECT\n",
"\tcast(strftime('%Y', sale_date) AS INT) sale_year,\n",
"\tcast(strftime('%W', sale_date) AS INT) sale_week,\n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
"ORDER BY sale_date\n",
")\n",
"SELECT \n",
"\tsale_year,\n",
"\tsale_week,\n",
"\tsum(total) week_total,\n",
"\tsum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum\n",
"FROM SalesTable\n",
"GROUP BY sale_year, sale_week\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
},
{
"cell_type": "markdown",
"id": "c537f43c",
"metadata": {},
"source": [
"```SQL\n",
"SELECT \n",
"\tsale_date,\n",
"\tsum(total) total,\n",
"\tmax(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max\n",
"FROM bakery_sales\n",
"GROUP BY sale_date\n",
"order by sale_date\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d343e93a",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" sale_date | \n",
" total | \n",
" seven_day_max | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2019-07-11 | \n",
" 39600 | \n",
" 212000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2019-07-12 | \n",
" 58000 | \n",
" 212000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2019-07-13 | \n",
" 117400 | \n",
" 212000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2019-07-14 | \n",
" 212000 | \n",
" 212000 | \n",
"
\n",
" \n",
" 4 | \n",
" 2019-07-15 | \n",
" 30900 | \n",
" 212000 | \n",
"
\n",
" \n",
" 5 | \n",
" 2019-07-17 | \n",
" 74100 | \n",
" 212000 | \n",
"
\n",
" \n",
" 6 | \n",
" 2019-07-19 | \n",
" 196800 | \n",
" 203400 | \n",
"
\n",
" \n",
" 7 | \n",
" 2019-07-20 | \n",
" 203400 | \n",
" 203400 | \n",
"
\n",
" \n",
" 8 | \n",
" 2019-07-21 | \n",
" 195900 | \n",
" 203400 | \n",
"
\n",
" \n",
" 9 | \n",
" 2019-07-22 | \n",
" 167100 | \n",
" 259200 | \n",
"
\n",
" \n",
" 10 | \n",
" 2019-07-24 | \n",
" 177600 | \n",
" 1593100 | \n",
"
\n",
" \n",
" 11 | \n",
" 2019-07-25 | \n",
" 259200 | \n",
" 1593100 | \n",
"
\n",
" \n",
" 12 | \n",
" 2019-07-26 | \n",
" 1593100 | \n",
" 1593100 | \n",
"
\n",
" \n",
" 13 | \n",
" 2019-07-27 | \n",
" 196000 | \n",
" 1593100 | \n",
"
\n",
" \n",
" 14 | \n",
" 2019-07-28 | \n",
" 136900 | \n",
" 1593100 | \n",
"
\n",
" \n",
" 15 | \n",
" 2019-07-29 | \n",
" 214600 | \n",
" 1593100 | \n",
"
\n",
" \n",
" 16 | \n",
" 2019-07-31 | \n",
" 203900 | \n",
" 309000 | \n",
"
\n",
" \n",
" 17 | \n",
" 2019-08-01 | \n",
" 215200 | \n",
" 309000 | \n",
"
\n",
" \n",
" 18 | \n",
" 2019-08-02 | \n",
" 168000 | \n",
" 309000 | \n",
"
\n",
" \n",
" 19 | \n",
" 2019-08-03 | \n",
" 309000 | \n",
" 309000 | \n",
"
\n",
" \n",
" 20 | \n",
" 2019-08-04 | \n",
" 221200 | \n",
" 309000 | \n",
"
\n",
" \n",
" 21 | \n",
" 2019-08-05 | \n",
" 277300 | \n",
" 309000 | \n",
"
\n",
" \n",
" 22 | \n",
" 2019-08-07 | \n",
" 201000 | \n",
" 300600 | \n",
"
\n",
" \n",
" 23 | \n",
" 2019-08-08 | \n",
" 290300 | \n",
" 300600 | \n",
"
\n",
" \n",
" 24 | \n",
" 2019-08-09 | \n",
" 112500 | \n",
" 300600 | \n",
"
\n",
" \n",
" 25 | \n",
" 2019-08-10 | \n",
" 300600 | \n",
" 300600 | \n",
"
\n",
" \n",
" 26 | \n",
" 2019-08-11 | \n",
" 288200 | \n",
" 300600 | \n",
"
\n",
" \n",
" 27 | \n",
" 2019-08-12 | \n",
" 232200 | \n",
" 300600 | \n",
"
\n",
" \n",
" 28 | \n",
" 2019-08-14 | \n",
" 269500 | \n",
" 288200 | \n",
"
\n",
" \n",
" 29 | \n",
" 2019-08-15 | \n",
" 203000 | \n",
" 314800 | \n",
"
\n",
" \n",
" 30 | \n",
" 2019-08-16 | \n",
" 95900 | \n",
" 314800 | \n",
"
\n",
" \n",
" 31 | \n",
" 2019-08-17 | \n",
" 241700 | \n",
" 314800 | \n",
"
\n",
" \n",
" 32 | \n",
" 2019-08-18 | \n",
" 314800 | \n",
" 314800 | \n",
"
\n",
" \n",
" 33 | \n",
" 2019-08-19 | \n",
" 272800 | \n",
" 314800 | \n",
"
\n",
" \n",
" 34 | \n",
" 2019-08-21 | \n",
" 296700 | \n",
" 314800 | \n",
"
\n",
" \n",
" 35 | \n",
" 2019-08-22 | \n",
" 182700 | \n",
" 296700 | \n",
"
\n",
" \n",
" 36 | \n",
" 2019-08-23 | \n",
" 155800 | \n",
" 296700 | \n",
"
\n",
" \n",
" 37 | \n",
" 2019-08-24 | \n",
" 264200 | \n",
" 296700 | \n",
"
\n",
" \n",
" 38 | \n",
" 2019-08-25 | \n",
" 205000 | \n",
" 264200 | \n",
"
\n",
" \n",
" 39 | \n",
" 2019-08-26 | \n",
" 159700 | \n",
" 264200 | \n",
"
\n",
" \n",
" 40 | \n",
" 2019-08-28 | \n",
" 184900 | \n",
" 257100 | \n",
"
\n",
" \n",
" 41 | \n",
" 2019-08-29 | \n",
" 254200 | \n",
" 257100 | \n",
"
\n",
" \n",
" 42 | \n",
" 2019-08-30 | \n",
" 257100 | \n",
" 257100 | \n",
"
\n",
" \n",
" 43 | \n",
" 2019-08-31 | \n",
" 127000 | \n",
" 257100 | \n",
"
\n",
" \n",
" 44 | \n",
" 2019-09-01 | \n",
" 242200 | \n",
" 277200 | \n",
"
\n",
" \n",
" 45 | \n",
" 2019-09-02 | \n",
" 132800 | \n",
" 277200 | \n",
"
\n",
" \n",
" 46 | \n",
" 2019-09-04 | \n",
" 277200 | \n",
" 285200 | \n",
"
\n",
" \n",
" 47 | \n",
" 2019-09-05 | \n",
" 247700 | \n",
" 285200 | \n",
"
\n",
" \n",
" 48 | \n",
" 2019-09-06 | \n",
" 121700 | \n",
" 285200 | \n",
"
\n",
" \n",
" 49 | \n",
" 2019-09-07 | \n",
" 285200 | \n",
" 285200 | \n",
"
\n",
" \n",
" 50 | \n",
" 2019-09-08 | \n",
" 82300 | \n",
" 285200 | \n",
"
\n",
" \n",
" 51 | \n",
" 2019-09-09 | \n",
" 215100 | \n",
" 285200 | \n",
"
\n",
" \n",
" 52 | \n",
" 2019-09-11 | \n",
" 217700 | \n",
" 217700 | \n",
"
\n",
" \n",
" 53 | \n",
" 2019-09-12 | \n",
" 185200 | \n",
" 293300 | \n",
"
\n",
" \n",
" 54 | \n",
" 2019-09-14 | \n",
" 150700 | \n",
" 293300 | \n",
"
\n",
" \n",
" 55 | \n",
" 2019-09-15 | \n",
" 293300 | \n",
" 293300 | \n",
"
\n",
" \n",
" 56 | \n",
" 2019-09-18 | \n",
" 150800 | \n",
" 301700 | \n",
"
\n",
" \n",
" 57 | \n",
" 2019-09-19 | \n",
" 301400 | \n",
" 301700 | \n",
"
\n",
" \n",
" 58 | \n",
" 2019-09-20 | \n",
" 203000 | \n",
" 301700 | \n",
"
\n",
" \n",
" 59 | \n",
" 2019-09-21 | \n",
" 301700 | \n",
" 301700 | \n",
"
\n",
" \n",
" 60 | \n",
" 2019-09-22 | \n",
" 294200 | \n",
" 301700 | \n",
"
\n",
" \n",
" 61 | \n",
" 2019-09-23 | \n",
" 174100 | \n",
" 301700 | \n",
"
\n",
" \n",
" 62 | \n",
" 2019-09-25 | \n",
" 147400 | \n",
" 294200 | \n",
"
\n",
" \n",
" 63 | \n",
" 2019-09-26 | \n",
" 54100 | \n",
" 287000 | \n",
"
\n",
" \n",
" 64 | \n",
" 2019-09-27 | \n",
" 140400 | \n",
" 287000 | \n",
"
\n",
" \n",
" 65 | \n",
" 2019-09-28 | \n",
" 287000 | \n",
" 287000 | \n",
"
\n",
" \n",
" 66 | \n",
" 2019-09-29 | \n",
" 276700 | \n",
" 287000 | \n",
"
\n",
" \n",
" 67 | \n",
" 2019-09-30 | \n",
" 113600 | \n",
" 319700 | \n",
"
\n",
" \n",
" 68 | \n",
" 2019-10-02 | \n",
" 107300 | \n",
" 319700 | \n",
"
\n",
" \n",
" 69 | \n",
" 2019-10-03 | \n",
" 319700 | \n",
" 319700 | \n",
"
\n",
" \n",
" 70 | \n",
" 2019-10-04 | \n",
" 48000 | \n",
" 319700 | \n",
"
\n",
" \n",
" 71 | \n",
" 2019-10-05 | \n",
" 99300 | \n",
" 319700 | \n",
"
\n",
" \n",
" 72 | \n",
" 2019-10-06 | \n",
" 131600 | \n",
" 319700 | \n",
"
\n",
" \n",
" 73 | \n",
" 2019-10-07 | \n",
" 117300 | \n",
" 136500 | \n",
"
\n",
" \n",
" 74 | \n",
" 2019-10-09 | \n",
" 45200 | \n",
" 153100 | \n",
"
\n",
" \n",
" 75 | \n",
" 2019-10-10 | \n",
" 136500 | \n",
" 293100 | \n",
"
\n",
" \n",
" 76 | \n",
" 2019-10-11 | \n",
" 113100 | \n",
" 293100 | \n",
"
\n",
" \n",
" 77 | \n",
" 2019-10-12 | \n",
" 153100 | \n",
" 293100 | \n",
"
\n",
" \n",
" 78 | \n",
" 2019-10-13 | \n",
" 293100 | \n",
" 293100 | \n",
"
\n",
" \n",
" 79 | \n",
" 2019-10-14 | \n",
" 177500 | \n",
" 293100 | \n",
"
\n",
" \n",
" 80 | \n",
" 2019-10-16 | \n",
" 148400 | \n",
" 293100 | \n",
"
\n",
" \n",
" 81 | \n",
" 2019-10-17 | \n",
" 186800 | \n",
" 213300 | \n",
"
\n",
" \n",
" 82 | \n",
" 2019-10-18 | \n",
" 84200 | \n",
" 307100 | \n",
"
\n",
" \n",
" 83 | \n",
" 2019-10-19 | \n",
" 202000 | \n",
" 307100 | \n",
"
\n",
" \n",
" 84 | \n",
" 2019-10-20 | \n",
" 213300 | \n",
" 307100 | \n",
"
\n",
" \n",
" 85 | \n",
" 2019-10-21 | \n",
" 307100 | \n",
" 307100 | \n",
"
\n",
" \n",
" 86 | \n",
" 2019-10-23 | \n",
" 71200 | \n",
" 307100 | \n",
"
\n",
" \n",
" 87 | \n",
" 2019-10-24 | \n",
" 219900 | \n",
" 307100 | \n",
"
\n",
" \n",
" 88 | \n",
" 2019-10-25 | \n",
" 133800 | \n",
" 219900 | \n",
"
\n",
" \n",
" 89 | \n",
" 2019-10-26 | \n",
" 128500 | \n",
" 219900 | \n",
"
\n",
" \n",
" 90 | \n",
" 2019-10-27 | \n",
" 211500 | \n",
" 219900 | \n",
"
\n",
" \n",
" 91 | \n",
" 2019-10-28 | \n",
" 215100 | \n",
" 215100 | \n",
"
\n",
" \n",
" 92 | \n",
" 2019-10-31 | \n",
" 95600 | \n",
" 236000 | \n",
"
\n",
" \n",
" 93 | \n",
" 2019-11-01 | \n",
" 114800 | \n",
" 236000 | \n",
"
\n",
" \n",
" 94 | \n",
" 2019-11-02 | \n",
" 236000 | \n",
" 236000 | \n",
"
\n",
" \n",
" 95 | \n",
" 2019-11-03 | \n",
" 203600 | \n",
" 236000 | \n",
"
\n",
" \n",
" 96 | \n",
" 2019-11-04 | \n",
" 92100 | \n",
" 236000 | \n",
"
\n",
" \n",
" 97 | \n",
" 2019-11-06 | \n",
" 129600 | \n",
" 279200 | \n",
"
\n",
" \n",
" 98 | \n",
" 2019-11-07 | \n",
" 167800 | \n",
" 357300 | \n",
"
\n",
" \n",
" 99 | \n",
" 2019-11-08 | \n",
" 223000 | \n",
" 357300 | \n",
"
\n",
" \n",
" 100 | \n",
" 2019-11-09 | \n",
" 279200 | \n",
" 357300 | \n",
"
\n",
" \n",
" 101 | \n",
" 2019-11-10 | \n",
" 357300 | \n",
" 357300 | \n",
"
\n",
" \n",
" 102 | \n",
" 2019-11-11 | \n",
" 128400 | \n",
" 357300 | \n",
"
\n",
" \n",
" 103 | \n",
" 2019-11-13 | \n",
" 145000 | \n",
" 357300 | \n",
"
\n",
" \n",
" 104 | \n",
" 2019-11-14 | \n",
" 72000 | \n",
" 319000 | \n",
"
\n",
" \n",
" 105 | \n",
" 2019-11-15 | \n",
" 170700 | \n",
" 319000 | \n",
"
\n",
" \n",
" 106 | \n",
" 2019-11-16 | \n",
" 153300 | \n",
" 319000 | \n",
"
\n",
" \n",
" 107 | \n",
" 2019-11-17 | \n",
" 319000 | \n",
" 319000 | \n",
"
\n",
" \n",
" 108 | \n",
" 2019-11-18 | \n",
" 98700 | \n",
" 319000 | \n",
"
\n",
" \n",
" 109 | \n",
" 2019-11-20 | \n",
" 136300 | \n",
" 319000 | \n",
"
\n",
" \n",
" 110 | \n",
" 2019-11-21 | \n",
" 61600 | \n",
" 289000 | \n",
"
\n",
" \n",
" 111 | \n",
" 2019-11-22 | \n",
" 66700 | \n",
" 289000 | \n",
"
\n",
" \n",
" 112 | \n",
" 2019-11-23 | \n",
" 178200 | \n",
" 289000 | \n",
"
\n",
" \n",
" 113 | \n",
" 2019-11-24 | \n",
" 289000 | \n",
" 289000 | \n",
"
\n",
" \n",
" 114 | \n",
" 2019-11-25 | \n",
" 167600 | \n",
" 289000 | \n",
"
\n",
" \n",
" 115 | \n",
" 2019-11-27 | \n",
" 127200 | \n",
" 289000 | \n",
"
\n",
" \n",
" 116 | \n",
" 2019-11-28 | \n",
" 189400 | \n",
" 295900 | \n",
"
\n",
" \n",
" 117 | \n",
" 2019-11-29 | \n",
" 197000 | \n",
" 295900 | \n",
"
\n",
" \n",
" 118 | \n",
" 2019-11-30 | \n",
" 239500 | \n",
" 295900 | \n",
"
\n",
" \n",
" 119 | \n",
" 2019-12-01 | \n",
" 295900 | \n",
" 295900 | \n",
"
\n",
" \n",
" 120 | \n",
" 2019-12-02 | \n",
" 106100 | \n",
" 295900 | \n",
"
\n",
" \n",
" 121 | \n",
" 2019-12-04 | \n",
" 215000 | \n",
" 295900 | \n",
"
\n",
" \n",
" 122 | \n",
" 2019-12-05 | \n",
" 202400 | \n",
" 409000 | \n",
"
\n",
" \n",
" 123 | \n",
" 2019-12-06 | \n",
" 195000 | \n",
" 409000 | \n",
"
\n",
" \n",
" 124 | \n",
" 2019-12-07 | \n",
" 185700 | \n",
" 409000 | \n",
"
\n",
" \n",
" 125 | \n",
" 2019-12-08 | \n",
" 409000 | \n",
" 409000 | \n",
"
\n",
" \n",
" 126 | \n",
" 2019-12-09 | \n",
" 142500 | \n",
" 409000 | \n",
"
\n",
" \n",
" 127 | \n",
" 2019-12-11 | \n",
" 206200 | \n",
" 409000 | \n",
"
\n",
" \n",
" 128 | \n",
" 2019-12-12 | \n",
" 201500 | \n",
" 206200 | \n",
"
\n",
" \n",
" 129 | \n",
" 2019-12-13 | \n",
" 105900 | \n",
" 206200 | \n",
"
\n",
" \n",
" 130 | \n",
" 2019-12-14 | \n",
" 151800 | \n",
" 206200 | \n",
"
\n",
" \n",
" 131 | \n",
" 2019-12-15 | \n",
" 173900 | \n",
" 205900 | \n",
"
\n",
" \n",
" 132 | \n",
" 2019-12-16 | \n",
" 157800 | \n",
" 205900 | \n",
"
\n",
" \n",
" 133 | \n",
" 2019-12-18 | \n",
" 205900 | \n",
" 205900 | \n",
"
\n",
" \n",
" 134 | \n",
" 2019-12-19 | \n",
" 185600 | \n",
" 223900 | \n",
"
\n",
" \n",
" 135 | \n",
" 2019-12-20 | \n",
" 121900 | \n",
" 223900 | \n",
"
\n",
" \n",
" 136 | \n",
" 2019-12-21 | \n",
" 194000 | \n",
" 223900 | \n",
"
\n",
" \n",
" 137 | \n",
" 2019-12-22 | \n",
" 223900 | \n",
" 223900 | \n",
"
\n",
" \n",
" 138 | \n",
" 2019-12-23 | \n",
" 166600 | \n",
" 223900 | \n",
"
\n",
" \n",
" 139 | \n",
" 2019-12-24 | \n",
" 56000 | \n",
" 223900 | \n",
"
\n",
" \n",
" 140 | \n",
" 2019-12-25 | \n",
" 109500 | \n",
" 369900 | \n",
"
\n",
" \n",
" 141 | \n",
" 2019-12-26 | \n",
" 92500 | \n",
" 369900 | \n",
"
\n",
" \n",
" 142 | \n",
" 2019-12-27 | \n",
" 47100 | \n",
" 369900 | \n",
"
\n",
" \n",
" 143 | \n",
" 2019-12-28 | \n",
" 369900 | \n",
" 369900 | \n",
"
\n",
" \n",
" 144 | \n",
" 2019-12-29 | \n",
" 249600 | \n",
" 369900 | \n",
"
\n",
" \n",
" 145 | \n",
" 2019-12-30 | \n",
" 238300 | \n",
" 369900 | \n",
"
\n",
" \n",
" 146 | \n",
" 2020-01-02 | \n",
" 234100 | \n",
" 275600 | \n",
"
\n",
" \n",
" 147 | \n",
" 2020-01-03 | \n",
" 238200 | \n",
" 275600 | \n",
"
\n",
" \n",
" 148 | \n",
" 2020-01-04 | \n",
" 127700 | \n",
" 275600 | \n",
"
\n",
" \n",
" 149 | \n",
" 2020-01-05 | \n",
" 275600 | \n",
" 275600 | \n",
"
\n",
" \n",
" 150 | \n",
" 2020-01-06 | \n",
" 109500 | \n",
" 275600 | \n",
"
\n",
" \n",
" 151 | \n",
" 2020-01-08 | \n",
" 251800 | \n",
" 275600 | \n",
"
\n",
" \n",
" 152 | \n",
" 2020-01-09 | \n",
" 259900 | \n",
" 307000 | \n",
"
\n",
" \n",
" 153 | \n",
" 2020-01-10 | \n",
" 146500 | \n",
" 307000 | \n",
"
\n",
" \n",
" 154 | \n",
" 2020-01-11 | \n",
" 149000 | \n",
" 307000 | \n",
"
\n",
" \n",
" 155 | \n",
" 2020-01-12 | \n",
" 307000 | \n",
" 307000 | \n",
"
\n",
" \n",
" 156 | \n",
" 2020-01-13 | \n",
" 138800 | \n",
" 307000 | \n",
"
\n",
" \n",
" 157 | \n",
" 2020-01-15 | \n",
" 264000 | \n",
" 307000 | \n",
"
\n",
" \n",
" 158 | \n",
" 2020-01-16 | \n",
" 33600 | \n",
" 354200 | \n",
"
\n",
" \n",
" 159 | \n",
" 2020-01-17 | \n",
" 107100 | \n",
" 354200 | \n",
"
\n",
" \n",
" 160 | \n",
" 2020-01-18 | \n",
" 162700 | \n",
" 354200 | \n",
"
\n",
" \n",
" 161 | \n",
" 2020-01-19 | \n",
" 354200 | \n",
" 354200 | \n",
"
\n",
" \n",
" 162 | \n",
" 2020-01-20 | \n",
" 92400 | \n",
" 354200 | \n",
"
\n",
" \n",
" 163 | \n",
" 2020-01-22 | \n",
" 136200 | \n",
" 354200 | \n",
"
\n",
" \n",
" 164 | \n",
" 2020-01-23 | \n",
" 288400 | \n",
" 288400 | \n",
"
\n",
" \n",
" 165 | \n",
" 2020-01-24 | \n",
" 53900 | \n",
" 288400 | \n",
"
\n",
" \n",
" 166 | \n",
" 2020-01-26 | \n",
" 142400 | \n",
" 288400 | \n",
"
\n",
" \n",
" 167 | \n",
" 2020-01-27 | \n",
" 158000 | \n",
" 245900 | \n",
"
\n",
" \n",
" 168 | \n",
" 2020-01-29 | \n",
" 104400 | \n",
" 245900 | \n",
"
\n",
" \n",
" 169 | \n",
" 2020-01-30 | \n",
" 245900 | \n",
" 342600 | \n",
"
\n",
" \n",
" 170 | \n",
" 2020-01-31 | \n",
" 201200 | \n",
" 342600 | \n",
"
\n",
" \n",
" 171 | \n",
" 2020-02-01 | \n",
" 173700 | \n",
" 342600 | \n",
"
\n",
" \n",
" 172 | \n",
" 2020-02-02 | \n",
" 342600 | \n",
" 342600 | \n",
"
\n",
" \n",
" 173 | \n",
" 2020-02-03 | \n",
" 93700 | \n",
" 342600 | \n",
"
\n",
" \n",
" 174 | \n",
" 2020-02-05 | \n",
" 281900 | \n",
" 342600 | \n",
"
\n",
" \n",
" 175 | \n",
" 2020-02-06 | \n",
" 206000 | \n",
" 341700 | \n",
"
\n",
" \n",
" 176 | \n",
" 2020-02-07 | \n",
" 288800 | \n",
" 341700 | \n",
"
\n",
" \n",
" 177 | \n",
" 2020-02-08 | \n",
" 252800 | \n",
" 341700 | \n",
"
\n",
" \n",
" 178 | \n",
" 2020-02-09 | \n",
" 341700 | \n",
" 354500 | \n",
"
\n",
" \n",
" 179 | \n",
" 2020-02-10 | \n",
" 65600 | \n",
" 354500 | \n",
"
\n",
" \n",
" 180 | \n",
" 2020-02-12 | \n",
" 354500 | \n",
" 354500 | \n",
"
\n",
" \n",
" 181 | \n",
" 2020-02-13 | \n",
" 110300 | \n",
" 354500 | \n",
"
\n",
" \n",
" 182 | \n",
" 2020-02-14 | \n",
" 242900 | \n",
" 354500 | \n",
"
\n",
" \n",
" 183 | \n",
" 2020-02-15 | \n",
" 235400 | \n",
" 354500 | \n",
"
\n",
" \n",
" 184 | \n",
" 2020-02-16 | \n",
" 290200 | \n",
" 290200 | \n",
"
\n",
" \n",
" 185 | \n",
" 2020-02-17 | \n",
" 183200 | \n",
" 290200 | \n",
"
\n",
" \n",
" 186 | \n",
" 2020-02-19 | \n",
" 212000 | \n",
" 353000 | \n",
"
\n",
" \n",
" 187 | \n",
" 2020-02-20 | \n",
" 249900 | \n",
" 647100 | \n",
"
\n",
" \n",
" 188 | \n",
" 2020-02-21 | \n",
" 155700 | \n",
" 647100 | \n",
"
\n",
" \n",
" 189 | \n",
" 2020-02-22 | \n",
" 353000 | \n",
" 647100 | \n",
"
\n",
" \n",
" 190 | \n",
" 2020-02-23 | \n",
" 647100 | \n",
" 647100 | \n",
"
\n",
" \n",
" 191 | \n",
" 2020-02-24 | \n",
" 189400 | \n",
" 647100 | \n",
"
\n",
" \n",
" 192 | \n",
" 2020-02-26 | \n",
" 345100 | \n",
" 647100 | \n",
"
\n",
" \n",
" 193 | \n",
" 2020-02-27 | \n",
" 197700 | \n",
" 403400 | \n",
"
\n",
" \n",
" 194 | \n",
" 2020-02-28 | \n",
" 337000 | \n",
" 403400 | \n",
"
\n",
" \n",
" 195 | \n",
" 2020-02-29 | \n",
" 273500 | \n",
" 403400 | \n",
"
\n",
" \n",
" 196 | \n",
" 2020-03-01 | \n",
" 403400 | \n",
" 403400 | \n",
"
\n",
" \n",
" 197 | \n",
" 2020-03-02 | \n",
" 273000 | \n",
" 421500 | \n",
"
\n",
" \n",
" 198 | \n",
" 2020-03-05 | \n",
" 421500 | \n",
" 421500 | \n",
"
\n",
" \n",
" 199 | \n",
" 2020-03-06 | \n",
" 305800 | \n",
" 421500 | \n",
"
\n",
" \n",
" 200 | \n",
" 2020-03-07 | \n",
" 294700 | \n",
" 421500 | \n",
"
\n",
" \n",
" 201 | \n",
" 2020-03-08 | \n",
" 356000 | \n",
" 421500 | \n",
"
\n",
" \n",
" 202 | \n",
" 2020-03-09 | \n",
" 144000 | \n",
" 356000 | \n",
"
\n",
" \n",
" 203 | \n",
" 2020-03-11 | \n",
" 204600 | \n",
" 356000 | \n",
"
\n",
" \n",
" 204 | \n",
" 2020-03-12 | \n",
" 251800 | \n",
" 259700 | \n",
"
\n",
" \n",
" 205 | \n",
" 2020-03-13 | \n",
" 224100 | \n",
" 263500 | \n",
"
\n",
" \n",
" 206 | \n",
" 2020-03-14 | \n",
" 212500 | \n",
" 263500 | \n",
"
\n",
" \n",
" 207 | \n",
" 2020-03-15 | \n",
" 259700 | \n",
" 323600 | \n",
"
\n",
" \n",
" 208 | \n",
" 2020-03-16 | \n",
" 263500 | \n",
" 323600 | \n",
"
\n",
" \n",
" 209 | \n",
" 2020-03-18 | \n",
" 323600 | \n",
" 368600 | \n",
"
\n",
" \n",
" 210 | \n",
" 2020-03-19 | \n",
" 173500 | \n",
" 368600 | \n",
"
\n",
" \n",
" 211 | \n",
" 2020-03-20 | \n",
" 135800 | \n",
" 368600 | \n",
"
\n",
" \n",
" 212 | \n",
" 2020-03-21 | \n",
" 368600 | \n",
" 368600 | \n",
"
\n",
" \n",
" 213 | \n",
" 2020-03-22 | \n",
" 257700 | \n",
" 368600 | \n",
"
\n",
" \n",
" 214 | \n",
" 2020-03-23 | \n",
" 213200 | \n",
" 368600 | \n",
"
\n",
" \n",
" 215 | \n",
" 2020-03-25 | \n",
" 216300 | \n",
" 257700 | \n",
"
\n",
" \n",
" 216 | \n",
" 2020-03-26 | \n",
" 206700 | \n",
" 320300 | \n",
"
\n",
" \n",
" 217 | \n",
" 2020-03-27 | \n",
" 198700 | \n",
" 320300 | \n",
"
\n",
" \n",
" 218 | \n",
" 2020-03-28 | \n",
" 197500 | \n",
" 320300 | \n",
"
\n",
" \n",
" 219 | \n",
" 2020-03-29 | \n",
" 320300 | \n",
" 320300 | \n",
"
\n",
" \n",
" 220 | \n",
" 2020-03-30 | \n",
" 218600 | \n",
" 320300 | \n",
"
\n",
" \n",
" 221 | \n",
" 2020-04-01 | \n",
" 230100 | \n",
" 320300 | \n",
"
\n",
" \n",
" 222 | \n",
" 2020-04-02 | \n",
" 297700 | \n",
" 297700 | \n",
"
\n",
" \n",
" 223 | \n",
" 2020-04-03 | \n",
" 140500 | \n",
" 297700 | \n",
"
\n",
" \n",
" 224 | \n",
" 2020-04-04 | \n",
" 153000 | \n",
" 297700 | \n",
"
\n",
" \n",
" 225 | \n",
" 2020-04-05 | \n",
" 225800 | \n",
" 297700 | \n",
"
\n",
" \n",
" 226 | \n",
" 2020-04-06 | \n",
" 132000 | \n",
" 272600 | \n",
"
\n",
" \n",
" 227 | \n",
" 2020-04-08 | \n",
" 272600 | \n",
" 272600 | \n",
"
\n",
" \n",
" 228 | \n",
" 2020-04-09 | \n",
" 198600 | \n",
" 272600 | \n",
"
\n",
" \n",
" 229 | \n",
" 2020-04-10 | \n",
" 149400 | \n",
" 272600 | \n",
"
\n",
" \n",
" 230 | \n",
" 2020-04-11 | \n",
" 119700 | \n",
" 272600 | \n",
"
\n",
" \n",
" 231 | \n",
" 2020-04-12 | \n",
" 212000 | \n",
" 212000 | \n",
"
\n",
" \n",
" 232 | \n",
" 2020-04-13 | \n",
" 53300 | \n",
" 212000 | \n",
"
\n",
" \n",
" 233 | \n",
" 2020-04-15 | \n",
" 202400 | \n",
" 269200 | \n",
"
\n",
" \n",
" 234 | \n",
" 2020-04-16 | \n",
" 102800 | \n",
" 269200 | \n",
"
\n",
" \n",
" 235 | \n",
" 2020-04-17 | \n",
" 147000 | \n",
" 269200 | \n",
"
\n",
" \n",
" 236 | \n",
" 2020-04-18 | \n",
" 269200 | \n",
" 269200 | \n",
"
\n",
" \n",
" 237 | \n",
" 2020-04-19 | \n",
" 140700 | \n",
" 269200 | \n",
"
\n",
" \n",
" 238 | \n",
" 2020-04-20 | \n",
" 188900 | \n",
" 269200 | \n",
"
\n",
" \n",
" 239 | \n",
" 2020-04-22 | \n",
" 225400 | \n",
" 307900 | \n",
"
\n",
" \n",
" 240 | \n",
" 2020-04-23 | \n",
" 164300 | \n",
" 307900 | \n",
"
\n",
" \n",
" 241 | \n",
" 2020-04-24 | \n",
" 165900 | \n",
" 307900 | \n",
"
\n",
" \n",
" 242 | \n",
" 2020-04-25 | \n",
" 307900 | \n",
" 307900 | \n",
"
\n",
" \n",
" 243 | \n",
" 2020-04-26 | \n",
" 277700 | \n",
" 307900 | \n",
"
\n",
" \n",
" 244 | \n",
" 2020-04-27 | \n",
" 264600 | \n",
" 307900 | \n",
"
\n",
" \n",
" 245 | \n",
" 2020-04-29 | \n",
" 118100 | \n",
" 277700 | \n",
"
\n",
" \n",
" 246 | \n",
" 2020-04-30 | \n",
" 134100 | \n",
" 264600 | \n",
"
\n",
" \n",
" 247 | \n",
" 2020-05-01 | \n",
" 215700 | \n",
" 215700 | \n",
"
\n",
" \n",
" 248 | \n",
" 2020-05-02 | \n",
" 92700 | \n",
" 215700 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('bakery_sales.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"SELECT \n",
"\tsale_date,\n",
"\tsum(total) total,\n",
"\tmax(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max\n",
"FROM bakery_sales\n",
"GROUP BY sale_date\n",
"order by sale_date\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
},
{
"cell_type": "markdown",
"id": "4f8c8b96",
"metadata": {},
"source": [
"## Lag and Lead\n",
"\n",
"\n",
"```SQL\n",
"WITH SalesTable AS (\n",
"SELECT\n",
"\tcast(strftime('%Y', sale_date) AS INT) sale_year,\n",
"\tcast(strftime('%W', sale_date) AS INT) sale_week,\n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
"ORDER BY sale_date\n",
")\n",
"SELECT \n",
"\tsale_year,\n",
"\tsale_week,\n",
"\tsum(total) week_total,\n",
"\tlag(sum(total), 1) over (order by sale_week) prev_week_total,\n",
"\tlead(sum(total), 1) over (order by sale_week) next_week_total\n",
"FROM SalesTable\n",
"GROUP BY sale_year, sale_week\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "63674b70",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" sale_year | \n",
" sale_week | \n",
" week_total | \n",
" prev_week_total | \n",
" next_week_total | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2020 | \n",
" 0 | \n",
" 875600 | \n",
" nan | \n",
" 1223700.000000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2020 | \n",
" 1 | \n",
" 1223700 | \n",
" 875600.000000 | \n",
" 1060400.000000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2020 | \n",
" 2 | \n",
" 1060400 | \n",
" 1223700.000000 | \n",
" 713300.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2020 | \n",
" 3 | \n",
" 713300 | \n",
" 1060400.000000 | \n",
" 1225800.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" 2020 | \n",
" 4 | \n",
" 1225800 | \n",
" 713300.000000 | \n",
" 1464900.000000 | \n",
"
\n",
" \n",
" 5 | \n",
" 2020 | \n",
" 5 | \n",
" 1464900 | \n",
" 1225800.000000 | \n",
" 1298900.000000 | \n",
"
\n",
" \n",
" 6 | \n",
" 2020 | \n",
" 6 | \n",
" 1298900 | \n",
" 1464900.000000 | \n",
" 1800900.000000 | \n",
"
\n",
" \n",
" 7 | \n",
" 2020 | \n",
" 7 | \n",
" 1800900 | \n",
" 1298900.000000 | \n",
" 1746100.000000 | \n",
"
\n",
" \n",
" 8 | \n",
" 2020 | \n",
" 8 | \n",
" 1746100 | \n",
" 1800900.000000 | \n",
" 1651000.000000 | \n",
"
\n",
" \n",
" 9 | \n",
" 2020 | \n",
" 9 | \n",
" 1651000 | \n",
" 1746100.000000 | \n",
" 1296700.000000 | \n",
"
\n",
" \n",
" 10 | \n",
" 2020 | \n",
" 10 | \n",
" 1296700 | \n",
" 1651000.000000 | \n",
" 1522700.000000 | \n",
"
\n",
" \n",
" 11 | \n",
" 2020 | \n",
" 11 | \n",
" 1522700 | \n",
" 1296700.000000 | \n",
" 1352700.000000 | \n",
"
\n",
" \n",
" 12 | \n",
" 2020 | \n",
" 12 | \n",
" 1352700 | \n",
" 1522700.000000 | \n",
" 1265700.000000 | \n",
"
\n",
" \n",
" 13 | \n",
" 2020 | \n",
" 13 | \n",
" 1265700 | \n",
" 1352700.000000 | \n",
" 1084300.000000 | \n",
"
\n",
" \n",
" 14 | \n",
" 2020 | \n",
" 14 | \n",
" 1084300 | \n",
" 1265700.000000 | \n",
" 915400.000000 | \n",
"
\n",
" \n",
" 15 | \n",
" 2020 | \n",
" 15 | \n",
" 915400 | \n",
" 1084300.000000 | \n",
" 1330100.000000 | \n",
"
\n",
" \n",
" 16 | \n",
" 2020 | \n",
" 16 | \n",
" 1330100 | \n",
" 915400.000000 | \n",
" 825200.000000 | \n",
"
\n",
" \n",
" 17 | \n",
" 2020 | \n",
" 17 | \n",
" 825200 | \n",
" 1330100.000000 | \n",
" 427000.000000 | \n",
"
\n",
" \n",
" 18 | \n",
" 2019 | \n",
" 27 | \n",
" 427000 | \n",
" 825200.000000 | \n",
" 701100.000000 | \n",
"
\n",
" \n",
" 19 | \n",
" 2019 | \n",
" 28 | \n",
" 701100 | \n",
" 427000.000000 | \n",
" 2529900.000000 | \n",
"
\n",
" \n",
" 20 | \n",
" 2019 | \n",
" 29 | \n",
" 2529900 | \n",
" 701100.000000 | \n",
" 1331900.000000 | \n",
"
\n",
" \n",
" 21 | \n",
" 2019 | \n",
" 30 | \n",
" 1331900 | \n",
" 2529900.000000 | \n",
" 1469900.000000 | \n",
"
\n",
" \n",
" 22 | \n",
" 2019 | \n",
" 31 | \n",
" 1469900 | \n",
" 1331900.000000 | \n",
" 1357100.000000 | \n",
"
\n",
" \n",
" 23 | \n",
" 2019 | \n",
" 32 | \n",
" 1357100 | \n",
" 1469900.000000 | \n",
" 1377200.000000 | \n",
"
\n",
" \n",
" 24 | \n",
" 2019 | \n",
" 33 | \n",
" 1377200 | \n",
" 1357100.000000 | \n",
" 1225100.000000 | \n",
"
\n",
" \n",
" 25 | \n",
" 2019 | \n",
" 34 | \n",
" 1225100 | \n",
" 1377200.000000 | \n",
" 1146900.000000 | \n",
"
\n",
" \n",
" 26 | \n",
" 2019 | \n",
" 35 | \n",
" 1146900 | \n",
" 1225100.000000 | \n",
" 1062000.000000 | \n",
"
\n",
" \n",
" 27 | \n",
" 2019 | \n",
" 36 | \n",
" 1062000 | \n",
" 1146900.000000 | \n",
" 1251100.000000 | \n",
"
\n",
" \n",
" 28 | \n",
" 2019 | \n",
" 37 | \n",
" 1251100 | \n",
" 1062000.000000 | \n",
" 1079700.000000 | \n",
"
\n",
" \n",
" 29 | \n",
" 2019 | \n",
" 38 | \n",
" 1079700 | \n",
" 1251100.000000 | \n",
" 819500.000000 | \n",
"
\n",
" \n",
" 30 | \n",
" 2019 | \n",
" 39 | \n",
" 819500 | \n",
" 1079700.000000 | \n",
" 858300.000000 | \n",
"
\n",
" \n",
" 31 | \n",
" 2019 | \n",
" 40 | \n",
" 858300 | \n",
" 819500.000000 | \n",
" 1012200.000000 | \n",
"
\n",
" \n",
" 32 | \n",
" 2019 | \n",
" 41 | \n",
" 1012200 | \n",
" 858300.000000 | \n",
" 1072000.000000 | \n",
"
\n",
" \n",
" 33 | \n",
" 2019 | \n",
" 42 | \n",
" 1072000 | \n",
" 1012200.000000 | \n",
" 865100.000000 | \n",
"
\n",
" \n",
" 34 | \n",
" 2019 | \n",
" 43 | \n",
" 865100 | \n",
" 1072000.000000 | \n",
" 1249000.000000 | \n",
"
\n",
" \n",
" 35 | \n",
" 2019 | \n",
" 44 | \n",
" 1249000 | \n",
" 865100.000000 | \n",
" 988400.000000 | \n",
"
\n",
" \n",
" 36 | \n",
" 2019 | \n",
" 45 | \n",
" 988400 | \n",
" 1249000.000000 | \n",
" 830500.000000 | \n",
"
\n",
" \n",
" 37 | \n",
" 2019 | \n",
" 46 | \n",
" 830500 | \n",
" 988400.000000 | \n",
" 1216600.000000 | \n",
"
\n",
" \n",
" 38 | \n",
" 2019 | \n",
" 47 | \n",
" 1216600 | \n",
" 830500.000000 | \n",
" 1313200.000000 | \n",
"
\n",
" \n",
" 39 | \n",
" 2019 | \n",
" 48 | \n",
" 1313200 | \n",
" 1216600.000000 | \n",
" 981800.000000 | \n",
"
\n",
" \n",
" 40 | \n",
" 2019 | \n",
" 49 | \n",
" 981800 | \n",
" 1313200.000000 | \n",
" 1089100.000000 | \n",
"
\n",
" \n",
" 41 | \n",
" 2019 | \n",
" 50 | \n",
" 1089100 | \n",
" 981800.000000 | \n",
" 1091200.000000 | \n",
"
\n",
" \n",
" 42 | \n",
" 2019 | \n",
" 51 | \n",
" 1091200 | \n",
" 1089100.000000 | \n",
" 238300.000000 | \n",
"
\n",
" \n",
" 43 | \n",
" 2019 | \n",
" 52 | \n",
" 238300 | \n",
" 1091200.000000 | \n",
" nan | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('bakery_sales.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"WITH SalesTable AS (\n",
"SELECT\n",
"\tcast(strftime('%Y', sale_date) AS INT) sale_year,\n",
"\tcast(strftime('%W', sale_date) AS INT) sale_week,\n",
"\tTotal\n",
"FROM\n",
" bakery_sales\n",
"ORDER BY sale_date\n",
")\n",
"SELECT \n",
"\tsale_year,\n",
"\tsale_week,\n",
"\tsum(total) week_total,\n",
"\tlag(sum(total), 1) over (order by sale_week) prev_week_total,\n",
"\tlead(sum(total), 1) over (order by sale_week) next_week_total\n",
"FROM SalesTable\n",
"GROUP BY sale_year, sale_week\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
},
{
"cell_type": "markdown",
"id": "134bcbd7",
"metadata": {},
"source": [
"```SQL\n",
"WITH SalesTable AS (\n",
" SELECT\n",
" cast(strftime('%Y', sale_date) AS INT) sale_year,\n",
" cast(strftime('%W', sale_date) AS INT) sale_week,\n",
" Total\n",
" FROM\n",
" bakery_sales\n",
" ORDER BY\n",
" sale_date\n",
")\n",
"SELECT\n",
" sale_year,\n",
" sale_week,\n",
" sum(total) week_total,\n",
" round(\n",
" CAST(\n",
" (\n",
" sum(total) - lag(sum(total), 1) over (\n",
" order by\n",
" sale_week\n",
" )\n",
" ) AS REAL\n",
" ) / CAST(\n",
" lag(sum(total), 1) over (\n",
" order by\n",
" sale_week\n",
" ) AS REAL\n",
" ) * 100,\n",
" 1\n",
" ) CHANGE\n",
"FROM\n",
" SalesTable\n",
"GROUP BY\n",
" sale_year,\n",
" sale_week\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "68df9964",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" sale_year | \n",
" sale_week | \n",
" week_total | \n",
" CHANGE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2020 | \n",
" 0 | \n",
" 875600 | \n",
" nan | \n",
"
\n",
" \n",
" 1 | \n",
" 2020 | \n",
" 1 | \n",
" 1223700 | \n",
" 39.800000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2020 | \n",
" 2 | \n",
" 1060400 | \n",
" -13.300000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2020 | \n",
" 3 | \n",
" 713300 | \n",
" -32.700000 | \n",
"
\n",
" \n",
" 4 | \n",
" 2020 | \n",
" 4 | \n",
" 1225800 | \n",
" 71.800000 | \n",
"
\n",
" \n",
" 5 | \n",
" 2020 | \n",
" 5 | \n",
" 1464900 | \n",
" 19.500000 | \n",
"
\n",
" \n",
" 6 | \n",
" 2020 | \n",
" 6 | \n",
" 1298900 | \n",
" -11.300000 | \n",
"
\n",
" \n",
" 7 | \n",
" 2020 | \n",
" 7 | \n",
" 1800900 | \n",
" 38.600000 | \n",
"
\n",
" \n",
" 8 | \n",
" 2020 | \n",
" 8 | \n",
" 1746100 | \n",
" -3.000000 | \n",
"
\n",
" \n",
" 9 | \n",
" 2020 | \n",
" 9 | \n",
" 1651000 | \n",
" -5.400000 | \n",
"
\n",
" \n",
" 10 | \n",
" 2020 | \n",
" 10 | \n",
" 1296700 | \n",
" -21.500000 | \n",
"
\n",
" \n",
" 11 | \n",
" 2020 | \n",
" 11 | \n",
" 1522700 | \n",
" 17.400000 | \n",
"
\n",
" \n",
" 12 | \n",
" 2020 | \n",
" 12 | \n",
" 1352700 | \n",
" -11.200000 | \n",
"
\n",
" \n",
" 13 | \n",
" 2020 | \n",
" 13 | \n",
" 1265700 | \n",
" -6.400000 | \n",
"
\n",
" \n",
" 14 | \n",
" 2020 | \n",
" 14 | \n",
" 1084300 | \n",
" -14.300000 | \n",
"
\n",
" \n",
" 15 | \n",
" 2020 | \n",
" 15 | \n",
" 915400 | \n",
" -15.600000 | \n",
"
\n",
" \n",
" 16 | \n",
" 2020 | \n",
" 16 | \n",
" 1330100 | \n",
" 45.300000 | \n",
"
\n",
" \n",
" 17 | \n",
" 2020 | \n",
" 17 | \n",
" 825200 | \n",
" -38.000000 | \n",
"
\n",
" \n",
" 18 | \n",
" 2019 | \n",
" 27 | \n",
" 427000 | \n",
" -48.300000 | \n",
"
\n",
" \n",
" 19 | \n",
" 2019 | \n",
" 28 | \n",
" 701100 | \n",
" 64.200000 | \n",
"
\n",
" \n",
" 20 | \n",
" 2019 | \n",
" 29 | \n",
" 2529900 | \n",
" 260.800000 | \n",
"
\n",
" \n",
" 21 | \n",
" 2019 | \n",
" 30 | \n",
" 1331900 | \n",
" -47.400000 | \n",
"
\n",
" \n",
" 22 | \n",
" 2019 | \n",
" 31 | \n",
" 1469900 | \n",
" 10.400000 | \n",
"
\n",
" \n",
" 23 | \n",
" 2019 | \n",
" 32 | \n",
" 1357100 | \n",
" -7.700000 | \n",
"
\n",
" \n",
" 24 | \n",
" 2019 | \n",
" 33 | \n",
" 1377200 | \n",
" 1.500000 | \n",
"
\n",
" \n",
" 25 | \n",
" 2019 | \n",
" 34 | \n",
" 1225100 | \n",
" -11.000000 | \n",
"
\n",
" \n",
" 26 | \n",
" 2019 | \n",
" 35 | \n",
" 1146900 | \n",
" -6.400000 | \n",
"
\n",
" \n",
" 27 | \n",
" 2019 | \n",
" 36 | \n",
" 1062000 | \n",
" -7.400000 | \n",
"
\n",
" \n",
" 28 | \n",
" 2019 | \n",
" 37 | \n",
" 1251100 | \n",
" 17.800000 | \n",
"
\n",
" \n",
" 29 | \n",
" 2019 | \n",
" 38 | \n",
" 1079700 | \n",
" -13.700000 | \n",
"
\n",
" \n",
" 30 | \n",
" 2019 | \n",
" 39 | \n",
" 819500 | \n",
" -24.100000 | \n",
"
\n",
" \n",
" 31 | \n",
" 2019 | \n",
" 40 | \n",
" 858300 | \n",
" 4.700000 | \n",
"
\n",
" \n",
" 32 | \n",
" 2019 | \n",
" 41 | \n",
" 1012200 | \n",
" 17.900000 | \n",
"
\n",
" \n",
" 33 | \n",
" 2019 | \n",
" 42 | \n",
" 1072000 | \n",
" 5.900000 | \n",
"
\n",
" \n",
" 34 | \n",
" 2019 | \n",
" 43 | \n",
" 865100 | \n",
" -19.300000 | \n",
"
\n",
" \n",
" 35 | \n",
" 2019 | \n",
" 44 | \n",
" 1249000 | \n",
" 44.400000 | \n",
"
\n",
" \n",
" 36 | \n",
" 2019 | \n",
" 45 | \n",
" 988400 | \n",
" -20.900000 | \n",
"
\n",
" \n",
" 37 | \n",
" 2019 | \n",
" 46 | \n",
" 830500 | \n",
" -16.000000 | \n",
"
\n",
" \n",
" 38 | \n",
" 2019 | \n",
" 47 | \n",
" 1216600 | \n",
" 46.500000 | \n",
"
\n",
" \n",
" 39 | \n",
" 2019 | \n",
" 48 | \n",
" 1313200 | \n",
" 7.900000 | \n",
"
\n",
" \n",
" 40 | \n",
" 2019 | \n",
" 49 | \n",
" 981800 | \n",
" -25.200000 | \n",
"
\n",
" \n",
" 41 | \n",
" 2019 | \n",
" 50 | \n",
" 1089100 | \n",
" 10.900000 | \n",
"
\n",
" \n",
" 42 | \n",
" 2019 | \n",
" 51 | \n",
" 1091200 | \n",
" 0.200000 | \n",
"
\n",
" \n",
" 43 | \n",
" 2019 | \n",
" 52 | \n",
" 238300 | \n",
" -78.200000 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('bakery_sales.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"WITH SalesTable AS (\n",
" SELECT\n",
" cast(strftime('%Y', sale_date) AS INT) sale_year,\n",
" cast(strftime('%W', sale_date) AS INT) sale_week,\n",
" Total\n",
" FROM\n",
" bakery_sales\n",
" ORDER BY\n",
" sale_date\n",
")\n",
"SELECT\n",
" sale_year,\n",
" sale_week,\n",
" sum(total) week_total,\n",
" round(\n",
" CAST(\n",
" (\n",
" sum(total) - lag(sum(total), 1) over (\n",
" order by\n",
" sale_week\n",
" )\n",
" ) AS REAL\n",
" ) / CAST(\n",
" lag(sum(total), 1) over (\n",
" order by\n",
" sale_week\n",
" ) AS REAL\n",
" ) * 100,\n",
" 1\n",
" ) CHANGE\n",
"FROM\n",
" SalesTable\n",
"GROUP BY\n",
" sale_year,\n",
" sale_week\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
},
{
"cell_type": "markdown",
"id": "0d157c2f",
"metadata": {},
"source": [
"## Ranking\n",
"\n",
"- `rank`: returns the same ranking in case of a tie, with gaps in the rankings\n",
"- `row_number`: returns a unique number for each row, with rankings arbitrarily assigned in case of a tie\n",
"- `dense_rank`: returns the same ranking in the case of a tie, with no gaps in the rankings\n",
"\n",
"Ref: https://blog.jooq.org/the-difference-between-row_number-rank-and-dense_rank/\n",
"\n",
"\n",
"```SQL\n",
"SELECT \n",
"\tPatientGender,\n",
"\tPatientMaritalStatus, \n",
"\tcount(*) StatusCount, \n",
"\trank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,\n",
"\trow_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,\n",
"\tdense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank\n",
"FROM \n",
"PatientCorePopulatedTable\n",
"GROUP BY PatientGender, PatientMaritalStatus\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c43fc496",
"metadata": {
"tags": [
"hide-input",
"output_scroll"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" PatientGender | \n",
" PatientMaritalStatus | \n",
" StatusCount | \n",
" StatusRank | \n",
" StatusRowNumber | \n",
" StatusDenseRank | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Female | \n",
" Married | \n",
" 23 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" Female | \n",
" Single | \n",
" 17 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 2 | \n",
" Female | \n",
" Divorced | \n",
" 6 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
" 3 | \n",
" Female | \n",
" Unknown | \n",
" 3 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" 4 | \n",
" Female | \n",
" Separated | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 4 | \n",
"
\n",
" \n",
" 5 | \n",
" Male | \n",
" Married | \n",
" 22 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 6 | \n",
" Male | \n",
" Single | \n",
" 15 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 7 | \n",
" Male | \n",
" Divorced | \n",
" 5 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
" 8 | \n",
" Male | \n",
" Unknown | \n",
" 3 | \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
"
\n",
" \n",
" 9 | \n",
" Male | \n",
" Separated | \n",
" 2 | \n",
" 5 | \n",
" 5 | \n",
" 5 | \n",
"
\n",
" \n",
" 10 | \n",
" Male | \n",
" Widowed | \n",
" 1 | \n",
" 6 | \n",
" 6 | \n",
" 6 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import sqlite3\n",
"import pandas as pd\n",
"\n",
"conn = sqlite3.connect('100_patients.db')\n",
"\n",
"sql_statement = \"\"\"\n",
"SELECT \n",
"\tPatientGender,\n",
"\tPatientMaritalStatus, \n",
"\tcount(*) StatusCount, \n",
"\trank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,\n",
"\trow_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,\n",
"\tdense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank\n",
"FROM \n",
"PatientCorePopulatedTable\n",
"GROUP BY PatientGender, PatientMaritalStatus\n",
"\"\"\"\n",
"df = pd.read_sql_query(sql_statement, conn)\n",
"df.style.set_table_attributes('style=\"font-size: 12px\"')"
]
}
],
"metadata": {
"jupytext": {
"formats": "md:myst",
"text_representation": {
"extension": ".md",
"format_name": "myst"
}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"source_map": [
11,
60,
104,
147,
196,
242,
290,
333,
381,
406,
433,
445,
463,
487,
515,
555,
601,
625
]
},
"nbformat": 4,
"nbformat_minor": 5
}