{ "cells": [ { "cell_type": "markdown", "id": "2540c433", "metadata": {}, "source": [ "# Analytic Functions\n", "\n", "Analytic functions allow you to group rows into windows, partitioning\n", "the data. Windows are defined using the `OVER` clause and optionally\n", "combined with the `PARTITION` subclause. \n", "\n", "Date Reference: https://www.techonthenet.com/sqlite/functions/julianday.php\n", "\n", "## Data Windows\n", "\n", "```SQL\n", "SELECT\n", " CASE\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n", " AND 3 THEN 'Q1'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n", " AND 6 THEN 'Q2'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n", " AND 9 THEN 'Q3'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n", " AND 12 THEN 'Q4'\n", " END Quarter,\n", " CASE\n", " WHEN strftime('%m', sale_date) = '01' THEN 'January'\n", " WHEN strftime('%m', sale_date) = '02' THEN 'February'\n", " WHEN strftime('%m', sale_date) = '03' THEN 'March'\n", " WHEN strftime('%m', sale_date) = '04' THEN 'April'\n", " WHEN strftime('%m', sale_date) = '05' THEN 'May'\n", " WHEN strftime('%m', sale_date) = '06' THEN 'June'\n", " WHEN strftime('%m', sale_date) = '07' THEN 'July'\n", " WHEN strftime('%m', sale_date) = '08' THEN 'August'\n", " WHEN strftime('%m', sale_date) = '09' THEN 'September'\n", " WHEN strftime('%m', sale_date) = '10' THEN 'October'\n", " WHEN strftime('%m', sale_date) = '11' THEN 'November'\n", " WHEN strftime('%m', sale_date) = '12' THEN 'December'\n", " END Month,\n", " sum(total) Monthly_Sales\n", "FROM\n", " bakery_sales\n", "GROUP BY\n", " Quarter,\n", " Month\n", "ORDER BY \n", "\tstrftime('%m', sale_date)\n", "```" ] }, { "cell_type": "code", "execution_count": 1, "id": "d01adbef", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 QuarterMonthMonthly_Sales
0Q1January4582500
1Q1February6423700
2Q1March6445100
3Q2April4893700
4Q2May308400
5Q3July4076500
6Q3August6100500
7Q3September4895500
8Q4October3959100
9Q4November4543000
10Q4December5009500
\n" ], "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('bakery_sales.db')\n", "\n", "sql_statement = \"\"\"\n", "SELECT\n", " CASE\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n", " AND 3 THEN 'Q1'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n", " AND 6 THEN 'Q2'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n", " AND 9 THEN 'Q3'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n", " AND 12 THEN 'Q4'\n", " END Quarter,\n", " CASE\n", " WHEN strftime('%m', sale_date) = '01' THEN 'January'\n", " WHEN strftime('%m', sale_date) = '02' THEN 'February'\n", " WHEN strftime('%m', sale_date) = '03' THEN 'March'\n", " WHEN strftime('%m', sale_date) = '04' THEN 'April'\n", " WHEN strftime('%m', sale_date) = '05' THEN 'May'\n", " WHEN strftime('%m', sale_date) = '06' THEN 'June'\n", " WHEN strftime('%m', sale_date) = '07' THEN 'July'\n", " WHEN strftime('%m', sale_date) = '08' THEN 'August'\n", " WHEN strftime('%m', sale_date) = '09' THEN 'September'\n", " WHEN strftime('%m', sale_date) = '10' THEN 'October'\n", " WHEN strftime('%m', sale_date) = '11' THEN 'November'\n", " WHEN strftime('%m', sale_date) = '12' THEN 'December'\n", " END Month,\n", " sum(total) Monthly_Sales\n", "FROM\n", " bakery_sales\n", "GROUP BY\n", " Quarter,\n", " Month\n", "ORDER BY \n", "\tstrftime('%m', sale_date)\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] }, { "cell_type": "markdown", "id": "0dcb8f2c", "metadata": {}, "source": [ "```SQL\n", "WITH SalesTable AS (\n", "SELECT\n", " CASE\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n", " AND 3 THEN 'Q1'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n", " AND 6 THEN 'Q2'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n", " AND 9 THEN 'Q3'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n", " AND 12 THEN 'Q4'\n", " END Quarter,\n", " CASE\n", " WHEN strftime('%m', sale_date) = '01' THEN 'January'\n", " WHEN strftime('%m', sale_date) = '02' THEN 'February'\n", " WHEN strftime('%m', sale_date) = '03' THEN 'March'\n", " WHEN strftime('%m', sale_date) = '04' THEN 'April'\n", " WHEN strftime('%m', sale_date) = '05' THEN 'May'\n", " WHEN strftime('%m', sale_date) = '06' THEN 'June'\n", " WHEN strftime('%m', sale_date) = '07' THEN 'July'\n", " WHEN strftime('%m', sale_date) = '08' THEN 'August'\n", " WHEN strftime('%m', sale_date) = '09' THEN 'September'\n", " WHEN strftime('%m', sale_date) = '10' THEN 'October'\n", " WHEN strftime('%m', sale_date) = '11' THEN 'November'\n", " WHEN strftime('%m', sale_date) = '12' THEN 'December'\n", " END Month, \n", "\tTotal\n", "FROM\n", " bakery_sales\n", ")\n", "SELECT \n", "\tQuarter,\n", "\tMonth, \n", "\tSum(Total) MonthlySales,\n", "\tMax(sum(total)) over() max_overall_sales,\n", "\tMax(sum(total)) over(partition by quarter) max_quarter_sales\n", "FROM SalesTable\n", "GROUP BY Quarter, Month\n", "```" ] }, { "cell_type": "code", "execution_count": 2, "id": "0e5011b2", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 QuarterMonthMonthlySalesmax_overall_salesmax_quarter_sales
0Q1February642370064451006445100
1Q1January458250064451006445100
2Q1March644510064451006445100
3Q2April489370064451004893700
4Q2May30840064451004893700
5Q3August610050064451006100500
6Q3July407650064451006100500
7Q3September489550064451006100500
8Q4December500950064451005009500
9Q4November454300064451005009500
10Q4October395910064451005009500
\n" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('bakery_sales.db')\n", "\n", "sql_statement = \"\"\"\n", "WITH SalesTable AS (\n", "SELECT\n", " CASE\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n", " AND 3 THEN 'Q1'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n", " AND 6 THEN 'Q2'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n", " AND 9 THEN 'Q3'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n", " AND 12 THEN 'Q4'\n", " END Quarter,\n", " CASE\n", " WHEN strftime('%m', sale_date) = '01' THEN 'January'\n", " WHEN strftime('%m', sale_date) = '02' THEN 'February'\n", " WHEN strftime('%m', sale_date) = '03' THEN 'March'\n", " WHEN strftime('%m', sale_date) = '04' THEN 'April'\n", " WHEN strftime('%m', sale_date) = '05' THEN 'May'\n", " WHEN strftime('%m', sale_date) = '06' THEN 'June'\n", " WHEN strftime('%m', sale_date) = '07' THEN 'July'\n", " WHEN strftime('%m', sale_date) = '08' THEN 'August'\n", " WHEN strftime('%m', sale_date) = '09' THEN 'September'\n", " WHEN strftime('%m', sale_date) = '10' THEN 'October'\n", " WHEN strftime('%m', sale_date) = '11' THEN 'November'\n", " WHEN strftime('%m', sale_date) = '12' THEN 'December'\n", " END Month, \n", "\tTotal\n", "FROM\n", " bakery_sales\n", ")\n", "SELECT \n", "\tQuarter,\n", "\tMonth, \n", "\tSum(Total) MonthlySales,\n", "\tMax(sum(total)) over() max_overall_sales,\n", "\tMax(sum(total)) over(partition by quarter) max_quarter_sales\n", "FROM SalesTable\n", "GROUP BY Quarter, Month\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] }, { "cell_type": "markdown", "id": "5ffb1fef", "metadata": {}, "source": [ "## Localized Sorting\n", "\n", "\n", "```SQL\n", "WITH SalesTable AS (\n", "SELECT\n", " CASE\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n", " AND 3 THEN 'Q1'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n", " AND 6 THEN 'Q2'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n", " AND 9 THEN 'Q3'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n", " AND 12 THEN 'Q4'\n", " END Quarter,\n", " CASE\n", " WHEN strftime('%m', sale_date) = '01' THEN 'January'\n", " WHEN strftime('%m', sale_date) = '02' THEN 'February'\n", " WHEN strftime('%m', sale_date) = '03' THEN 'March'\n", " WHEN strftime('%m', sale_date) = '04' THEN 'April'\n", " WHEN strftime('%m', sale_date) = '05' THEN 'May'\n", " WHEN strftime('%m', sale_date) = '06' THEN 'June'\n", " WHEN strftime('%m', sale_date) = '07' THEN 'July'\n", " WHEN strftime('%m', sale_date) = '08' THEN 'August'\n", " WHEN strftime('%m', sale_date) = '09' THEN 'September'\n", " WHEN strftime('%m', sale_date) = '10' THEN 'October'\n", " WHEN strftime('%m', sale_date) = '11' THEN 'November'\n", " WHEN strftime('%m', sale_date) = '12' THEN 'December'\n", " END Month, \n", "\tTotal\n", "FROM\n", " bakery_sales\n", ")\n", "SELECT \n", "\tQuarter,\n", "\tMonth, \n", "\tSum(Total) MonthlySales,\n", "\trank() OVER (ORDER BY -sum(total)) SalesRank\n", "FROM SalesTable\n", "GROUP BY Quarter, Month\n", "```" ] }, { "cell_type": "code", "execution_count": 3, "id": "1764dacd", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 QuarterMonthMonthlySalesSalesRank
0Q1March64451001
1Q1February64237002
2Q3August61005003
3Q4December50095004
4Q3September48955005
5Q2April48937006
6Q1January45825007
7Q4November45430008
8Q3July40765009
9Q4October395910010
10Q2May30840011
\n" ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('bakery_sales.db')\n", "\n", "sql_statement = \"\"\"\n", "WITH SalesTable AS (\n", "SELECT\n", " CASE\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n", " AND 3 THEN 'Q1'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n", " AND 6 THEN 'Q2'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n", " AND 9 THEN 'Q3'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n", " AND 12 THEN 'Q4'\n", " END Quarter,\n", " CASE\n", " WHEN strftime('%m', sale_date) = '01' THEN 'January'\n", " WHEN strftime('%m', sale_date) = '02' THEN 'February'\n", " WHEN strftime('%m', sale_date) = '03' THEN 'March'\n", " WHEN strftime('%m', sale_date) = '04' THEN 'April'\n", " WHEN strftime('%m', sale_date) = '05' THEN 'May'\n", " WHEN strftime('%m', sale_date) = '06' THEN 'June'\n", " WHEN strftime('%m', sale_date) = '07' THEN 'July'\n", " WHEN strftime('%m', sale_date) = '08' THEN 'August'\n", " WHEN strftime('%m', sale_date) = '09' THEN 'September'\n", " WHEN strftime('%m', sale_date) = '10' THEN 'October'\n", " WHEN strftime('%m', sale_date) = '11' THEN 'November'\n", " WHEN strftime('%m', sale_date) = '12' THEN 'December'\n", " END Month, \n", "\tTotal\n", "FROM\n", " bakery_sales\n", ")\n", "SELECT \n", "\tQuarter,\n", "\tMonth, \n", "\tSum(Total) MonthlySales,\n", "\trank() OVER (ORDER BY -sum(total)) SalesRank\n", "FROM SalesTable\n", "GROUP BY Quarter, Month\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] }, { "cell_type": "markdown", "id": "76d04b33", "metadata": {}, "source": [ "```SQL\n", "WITH SalesTable AS (\n", "SELECT\n", " CASE\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n", " AND 3 THEN 'Q1'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n", " AND 6 THEN 'Q2'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n", " AND 9 THEN 'Q3'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n", " AND 12 THEN 'Q4'\n", " END Quarter,\n", " CASE\n", " WHEN strftime('%m', sale_date) = '01' THEN 'January'\n", " WHEN strftime('%m', sale_date) = '02' THEN 'February'\n", " WHEN strftime('%m', sale_date) = '03' THEN 'March'\n", " WHEN strftime('%m', sale_date) = '04' THEN 'April'\n", " WHEN strftime('%m', sale_date) = '05' THEN 'May'\n", " WHEN strftime('%m', sale_date) = '06' THEN 'June'\n", " WHEN strftime('%m', sale_date) = '07' THEN 'July'\n", " WHEN strftime('%m', sale_date) = '08' THEN 'August'\n", " WHEN strftime('%m', sale_date) = '09' THEN 'September'\n", " WHEN strftime('%m', sale_date) = '10' THEN 'October'\n", " WHEN strftime('%m', sale_date) = '11' THEN 'November'\n", " WHEN strftime('%m', sale_date) = '12' THEN 'December'\n", " END Month, \n", "\tTotal\n", "FROM\n", " bakery_sales\n", ")\n", "SELECT \n", "\tQuarter,\n", "\tMonth, \n", "\tSum(Total) MonthlySales,\n", "\trank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank\n", "FROM SalesTable\n", "GROUP BY Quarter, Month\n", "```" ] }, { "cell_type": "code", "execution_count": 4, "id": "fe8aa08f", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 QuarterMonthMonthlySalesSalesRank
0Q1March64451001
1Q1February64237002
2Q1January45825003
3Q2April48937001
4Q2May3084002
5Q3August61005001
6Q3September48955002
7Q3July40765003
8Q4December50095001
9Q4November45430002
10Q4October39591003
\n" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('bakery_sales.db')\n", "\n", "sql_statement = \"\"\"\n", "WITH SalesTable AS (\n", "SELECT\n", " CASE\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 1\n", " AND 3 THEN 'Q1'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 4\n", " AND 6 THEN 'Q2'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 7\n", " AND 9 THEN 'Q3'\n", " WHEN 0 + strftime('%m', sale_date) BETWEEN 10\n", " AND 12 THEN 'Q4'\n", " END Quarter,\n", " CASE\n", " WHEN strftime('%m', sale_date) = '01' THEN 'January'\n", " WHEN strftime('%m', sale_date) = '02' THEN 'February'\n", " WHEN strftime('%m', sale_date) = '03' THEN 'March'\n", " WHEN strftime('%m', sale_date) = '04' THEN 'April'\n", " WHEN strftime('%m', sale_date) = '05' THEN 'May'\n", " WHEN strftime('%m', sale_date) = '06' THEN 'June'\n", " WHEN strftime('%m', sale_date) = '07' THEN 'July'\n", " WHEN strftime('%m', sale_date) = '08' THEN 'August'\n", " WHEN strftime('%m', sale_date) = '09' THEN 'September'\n", " WHEN strftime('%m', sale_date) = '10' THEN 'October'\n", " WHEN strftime('%m', sale_date) = '11' THEN 'November'\n", " WHEN strftime('%m', sale_date) = '12' THEN 'December'\n", " END Month, \n", "\tTotal\n", "FROM\n", " bakery_sales\n", ")\n", "SELECT \n", "\tQuarter,\n", "\tMonth, \n", "\tSum(Total) MonthlySales,\n", "\trank() OVER (PARTITION BY Quarter ORDER BY -sum(total)) SalesRank\n", "FROM SalesTable\n", "GROUP BY Quarter, Month\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] }, { "cell_type": "markdown", "id": "b07814fc", "metadata": {}, "source": [ "## Window Frames\n", "\n", "ref: https://www.sqlitetutorial.net/sqlite-window-functions/sqlite-window-frame/\n", "\n", "```SQL\n", "WITH SalesTable AS (\n", "SELECT\n", "\tcast(strftime('%Y', sale_date) AS INT) sale_year,\n", "\tcast(strftime('%W', sale_date) AS INT) sale_week,\n", "\tTotal\n", "FROM\n", " bakery_sales\n", "ORDER BY sale_date\n", ")\n", "SELECT \n", "\tsale_year,\n", "\tsale_week,\n", "\tsum(total) week_total,\n", "\tsum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum\n", "FROM SalesTable\n", "GROUP BY sale_year, sale_week\n", "```" ] }, { "cell_type": "code", "execution_count": 5, "id": "0e9c9619", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 sale_yearsale_weekweek_totalrolling_sum
0201927427000427000
12019287011001128100
220192925299003658000
320193013319004989900
420193114699006459800
520193213571007816900
620193313772009194100
7201934122510010419200
8201935114690011566100
9201936106200012628100
10201937125110013879200
11201938107970014958900
1220193981950015778400
1320194085830016636700
14201941101220017648900
15201942107200018720900
1620194386510019586000
17201944124900020835000
1820194598840021823400
1920194683050022653900
20201947121660023870500
21201948131320025183700
2220194998180026165500
23201950108910027254600
24201951109120028345800
2520195223830028584100
262020087560029459700
2720201122370030683400
2820202106040031743800
292020371330032457100
3020204122580033682900
3120205146490035147800
3220206129890036446700
3320207180090038247600
3420208174610039993700
3520209165100041644700
36202010129670042941400
37202011152270044464100
38202012135270045816800
39202013126570047082500
40202014108430048166800
4120201591540049082200
42202016133010050412300
4320201782520051237500
\n" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('bakery_sales.db')\n", "\n", "sql_statement = \"\"\"\n", "WITH SalesTable AS (\n", "SELECT\n", "\tcast(strftime('%Y', sale_date) AS INT) sale_year,\n", "\tcast(strftime('%W', sale_date) AS INT) sale_week,\n", "\tTotal\n", "FROM\n", " bakery_sales\n", "ORDER BY sale_date\n", ")\n", "SELECT \n", "\tsale_year,\n", "\tsale_week,\n", "\tsum(total) week_total,\n", "\tsum(sum(total)) OVER (ORDER BY sale_year, sale_week ROWS UNBOUNDED PRECEDING) rolling_sum\n", "FROM SalesTable\n", "GROUP BY sale_year, sale_week\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] }, { "cell_type": "markdown", "id": "c537f43c", "metadata": {}, "source": [ "```SQL\n", "SELECT \n", "\tsale_date,\n", "\tsum(total) total,\n", "\tmax(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max\n", "FROM bakery_sales\n", "GROUP BY sale_date\n", "order by sale_date\n", "```" ] }, { "cell_type": "code", "execution_count": 6, "id": "d343e93a", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 sale_datetotalseven_day_max
02019-07-1139600212000
12019-07-1258000212000
22019-07-13117400212000
32019-07-14212000212000
42019-07-1530900212000
52019-07-1774100212000
62019-07-19196800203400
72019-07-20203400203400
82019-07-21195900203400
92019-07-22167100259200
102019-07-241776001593100
112019-07-252592001593100
122019-07-2615931001593100
132019-07-271960001593100
142019-07-281369001593100
152019-07-292146001593100
162019-07-31203900309000
172019-08-01215200309000
182019-08-02168000309000
192019-08-03309000309000
202019-08-04221200309000
212019-08-05277300309000
222019-08-07201000300600
232019-08-08290300300600
242019-08-09112500300600
252019-08-10300600300600
262019-08-11288200300600
272019-08-12232200300600
282019-08-14269500288200
292019-08-15203000314800
302019-08-1695900314800
312019-08-17241700314800
322019-08-18314800314800
332019-08-19272800314800
342019-08-21296700314800
352019-08-22182700296700
362019-08-23155800296700
372019-08-24264200296700
382019-08-25205000264200
392019-08-26159700264200
402019-08-28184900257100
412019-08-29254200257100
422019-08-30257100257100
432019-08-31127000257100
442019-09-01242200277200
452019-09-02132800277200
462019-09-04277200285200
472019-09-05247700285200
482019-09-06121700285200
492019-09-07285200285200
502019-09-0882300285200
512019-09-09215100285200
522019-09-11217700217700
532019-09-12185200293300
542019-09-14150700293300
552019-09-15293300293300
562019-09-18150800301700
572019-09-19301400301700
582019-09-20203000301700
592019-09-21301700301700
602019-09-22294200301700
612019-09-23174100301700
622019-09-25147400294200
632019-09-2654100287000
642019-09-27140400287000
652019-09-28287000287000
662019-09-29276700287000
672019-09-30113600319700
682019-10-02107300319700
692019-10-03319700319700
702019-10-0448000319700
712019-10-0599300319700
722019-10-06131600319700
732019-10-07117300136500
742019-10-0945200153100
752019-10-10136500293100
762019-10-11113100293100
772019-10-12153100293100
782019-10-13293100293100
792019-10-14177500293100
802019-10-16148400293100
812019-10-17186800213300
822019-10-1884200307100
832019-10-19202000307100
842019-10-20213300307100
852019-10-21307100307100
862019-10-2371200307100
872019-10-24219900307100
882019-10-25133800219900
892019-10-26128500219900
902019-10-27211500219900
912019-10-28215100215100
922019-10-3195600236000
932019-11-01114800236000
942019-11-02236000236000
952019-11-03203600236000
962019-11-0492100236000
972019-11-06129600279200
982019-11-07167800357300
992019-11-08223000357300
1002019-11-09279200357300
1012019-11-10357300357300
1022019-11-11128400357300
1032019-11-13145000357300
1042019-11-1472000319000
1052019-11-15170700319000
1062019-11-16153300319000
1072019-11-17319000319000
1082019-11-1898700319000
1092019-11-20136300319000
1102019-11-2161600289000
1112019-11-2266700289000
1122019-11-23178200289000
1132019-11-24289000289000
1142019-11-25167600289000
1152019-11-27127200289000
1162019-11-28189400295900
1172019-11-29197000295900
1182019-11-30239500295900
1192019-12-01295900295900
1202019-12-02106100295900
1212019-12-04215000295900
1222019-12-05202400409000
1232019-12-06195000409000
1242019-12-07185700409000
1252019-12-08409000409000
1262019-12-09142500409000
1272019-12-11206200409000
1282019-12-12201500206200
1292019-12-13105900206200
1302019-12-14151800206200
1312019-12-15173900205900
1322019-12-16157800205900
1332019-12-18205900205900
1342019-12-19185600223900
1352019-12-20121900223900
1362019-12-21194000223900
1372019-12-22223900223900
1382019-12-23166600223900
1392019-12-2456000223900
1402019-12-25109500369900
1412019-12-2692500369900
1422019-12-2747100369900
1432019-12-28369900369900
1442019-12-29249600369900
1452019-12-30238300369900
1462020-01-02234100275600
1472020-01-03238200275600
1482020-01-04127700275600
1492020-01-05275600275600
1502020-01-06109500275600
1512020-01-08251800275600
1522020-01-09259900307000
1532020-01-10146500307000
1542020-01-11149000307000
1552020-01-12307000307000
1562020-01-13138800307000
1572020-01-15264000307000
1582020-01-1633600354200
1592020-01-17107100354200
1602020-01-18162700354200
1612020-01-19354200354200
1622020-01-2092400354200
1632020-01-22136200354200
1642020-01-23288400288400
1652020-01-2453900288400
1662020-01-26142400288400
1672020-01-27158000245900
1682020-01-29104400245900
1692020-01-30245900342600
1702020-01-31201200342600
1712020-02-01173700342600
1722020-02-02342600342600
1732020-02-0393700342600
1742020-02-05281900342600
1752020-02-06206000341700
1762020-02-07288800341700
1772020-02-08252800341700
1782020-02-09341700354500
1792020-02-1065600354500
1802020-02-12354500354500
1812020-02-13110300354500
1822020-02-14242900354500
1832020-02-15235400354500
1842020-02-16290200290200
1852020-02-17183200290200
1862020-02-19212000353000
1872020-02-20249900647100
1882020-02-21155700647100
1892020-02-22353000647100
1902020-02-23647100647100
1912020-02-24189400647100
1922020-02-26345100647100
1932020-02-27197700403400
1942020-02-28337000403400
1952020-02-29273500403400
1962020-03-01403400403400
1972020-03-02273000421500
1982020-03-05421500421500
1992020-03-06305800421500
2002020-03-07294700421500
2012020-03-08356000421500
2022020-03-09144000356000
2032020-03-11204600356000
2042020-03-12251800259700
2052020-03-13224100263500
2062020-03-14212500263500
2072020-03-15259700323600
2082020-03-16263500323600
2092020-03-18323600368600
2102020-03-19173500368600
2112020-03-20135800368600
2122020-03-21368600368600
2132020-03-22257700368600
2142020-03-23213200368600
2152020-03-25216300257700
2162020-03-26206700320300
2172020-03-27198700320300
2182020-03-28197500320300
2192020-03-29320300320300
2202020-03-30218600320300
2212020-04-01230100320300
2222020-04-02297700297700
2232020-04-03140500297700
2242020-04-04153000297700
2252020-04-05225800297700
2262020-04-06132000272600
2272020-04-08272600272600
2282020-04-09198600272600
2292020-04-10149400272600
2302020-04-11119700272600
2312020-04-12212000212000
2322020-04-1353300212000
2332020-04-15202400269200
2342020-04-16102800269200
2352020-04-17147000269200
2362020-04-18269200269200
2372020-04-19140700269200
2382020-04-20188900269200
2392020-04-22225400307900
2402020-04-23164300307900
2412020-04-24165900307900
2422020-04-25307900307900
2432020-04-26277700307900
2442020-04-27264600307900
2452020-04-29118100277700
2462020-04-30134100264600
2472020-05-01215700215700
2482020-05-0292700215700
\n" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('bakery_sales.db')\n", "\n", "sql_statement = \"\"\"\n", "SELECT \n", "\tsale_date,\n", "\tsum(total) total,\n", "\tmax(sum(total)) over (order by cast(strftime('%j', sale_date) AS INT) range between 3 preceding and 3 following) seven_day_max\n", "FROM bakery_sales\n", "GROUP BY sale_date\n", "order by sale_date\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] }, { "cell_type": "markdown", "id": "4f8c8b96", "metadata": {}, "source": [ "## Lag and Lead\n", "\n", "\n", "```SQL\n", "WITH SalesTable AS (\n", "SELECT\n", "\tcast(strftime('%Y', sale_date) AS INT) sale_year,\n", "\tcast(strftime('%W', sale_date) AS INT) sale_week,\n", "\tTotal\n", "FROM\n", " bakery_sales\n", "ORDER BY sale_date\n", ")\n", "SELECT \n", "\tsale_year,\n", "\tsale_week,\n", "\tsum(total) week_total,\n", "\tlag(sum(total), 1) over (order by sale_week) prev_week_total,\n", "\tlead(sum(total), 1) over (order by sale_week) next_week_total\n", "FROM SalesTable\n", "GROUP BY sale_year, sale_week\n", "```" ] }, { "cell_type": "code", "execution_count": 7, "id": "63674b70", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 sale_yearsale_weekweek_totalprev_week_totalnext_week_total
020200875600nan1223700.000000
1202011223700875600.0000001060400.000000
22020210604001223700.000000713300.000000
3202037133001060400.0000001225800.000000
4202041225800713300.0000001464900.000000
52020514649001225800.0000001298900.000000
62020612989001464900.0000001800900.000000
72020718009001298900.0000001746100.000000
82020817461001800900.0000001651000.000000
92020916510001746100.0000001296700.000000
1020201012967001651000.0000001522700.000000
1120201115227001296700.0000001352700.000000
1220201213527001522700.0000001265700.000000
1320201312657001352700.0000001084300.000000
1420201410843001265700.000000915400.000000
152020159154001084300.0000001330100.000000
162020161330100915400.000000825200.000000
172020178252001330100.000000427000.000000
18201927427000825200.000000701100.000000
19201928701100427000.0000002529900.000000
202019292529900701100.0000001331900.000000
2120193013319002529900.0000001469900.000000
2220193114699001331900.0000001357100.000000
2320193213571001469900.0000001377200.000000
2420193313772001357100.0000001225100.000000
2520193412251001377200.0000001146900.000000
2620193511469001225100.0000001062000.000000
2720193610620001146900.0000001251100.000000
2820193712511001062000.0000001079700.000000
2920193810797001251100.000000819500.000000
302019398195001079700.000000858300.000000
31201940858300819500.0000001012200.000000
322019411012200858300.0000001072000.000000
3320194210720001012200.000000865100.000000
342019438651001072000.0000001249000.000000
352019441249000865100.000000988400.000000
362019459884001249000.000000830500.000000
37201946830500988400.0000001216600.000000
382019471216600830500.0000001313200.000000
3920194813132001216600.000000981800.000000
402019499818001313200.0000001089100.000000
412019501089100981800.0000001091200.000000
4220195110912001089100.000000238300.000000
432019522383001091200.000000nan
\n" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('bakery_sales.db')\n", "\n", "sql_statement = \"\"\"\n", "WITH SalesTable AS (\n", "SELECT\n", "\tcast(strftime('%Y', sale_date) AS INT) sale_year,\n", "\tcast(strftime('%W', sale_date) AS INT) sale_week,\n", "\tTotal\n", "FROM\n", " bakery_sales\n", "ORDER BY sale_date\n", ")\n", "SELECT \n", "\tsale_year,\n", "\tsale_week,\n", "\tsum(total) week_total,\n", "\tlag(sum(total), 1) over (order by sale_week) prev_week_total,\n", "\tlead(sum(total), 1) over (order by sale_week) next_week_total\n", "FROM SalesTable\n", "GROUP BY sale_year, sale_week\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] }, { "cell_type": "markdown", "id": "134bcbd7", "metadata": {}, "source": [ "```SQL\n", "WITH SalesTable AS (\n", " SELECT\n", " cast(strftime('%Y', sale_date) AS INT) sale_year,\n", " cast(strftime('%W', sale_date) AS INT) sale_week,\n", " Total\n", " FROM\n", " bakery_sales\n", " ORDER BY\n", " sale_date\n", ")\n", "SELECT\n", " sale_year,\n", " sale_week,\n", " sum(total) week_total,\n", " round(\n", " CAST(\n", " (\n", " sum(total) - lag(sum(total), 1) over (\n", " order by\n", " sale_week\n", " )\n", " ) AS REAL\n", " ) / CAST(\n", " lag(sum(total), 1) over (\n", " order by\n", " sale_week\n", " ) AS REAL\n", " ) * 100,\n", " 1\n", " ) CHANGE\n", "FROM\n", " SalesTable\n", "GROUP BY\n", " sale_year,\n", " sale_week\n", "```" ] }, { "cell_type": "code", "execution_count": 8, "id": "68df9964", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 sale_yearsale_weekweek_totalCHANGE
020200875600nan
120201122370039.800000
2202021060400-13.300000
320203713300-32.700000
420204122580071.800000
520205146490019.500000
6202061298900-11.300000
720207180090038.600000
8202081746100-3.000000
9202091651000-5.400000
102020101296700-21.500000
11202011152270017.400000
122020121352700-11.200000
132020131265700-6.400000
142020141084300-14.300000
15202015915400-15.600000
16202016133010045.300000
17202017825200-38.000000
18201927427000-48.300000
1920192870110064.200000
202019292529900260.800000
212019301331900-47.400000
22201931146990010.400000
232019321357100-7.700000
2420193313772001.500000
252019341225100-11.000000
262019351146900-6.400000
272019361062000-7.400000
28201937125110017.800000
292019381079700-13.700000
30201939819500-24.100000
312019408583004.700000
32201941101220017.900000
3320194210720005.900000
34201943865100-19.300000
35201944124900044.400000
36201945988400-20.900000
37201946830500-16.000000
38201947121660046.500000
3920194813132007.900000
40201949981800-25.200000
41201950108910010.900000
4220195110912000.200000
43201952238300-78.200000
\n" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('bakery_sales.db')\n", "\n", "sql_statement = \"\"\"\n", "WITH SalesTable AS (\n", " SELECT\n", " cast(strftime('%Y', sale_date) AS INT) sale_year,\n", " cast(strftime('%W', sale_date) AS INT) sale_week,\n", " Total\n", " FROM\n", " bakery_sales\n", " ORDER BY\n", " sale_date\n", ")\n", "SELECT\n", " sale_year,\n", " sale_week,\n", " sum(total) week_total,\n", " round(\n", " CAST(\n", " (\n", " sum(total) - lag(sum(total), 1) over (\n", " order by\n", " sale_week\n", " )\n", " ) AS REAL\n", " ) / CAST(\n", " lag(sum(total), 1) over (\n", " order by\n", " sale_week\n", " ) AS REAL\n", " ) * 100,\n", " 1\n", " ) CHANGE\n", "FROM\n", " SalesTable\n", "GROUP BY\n", " sale_year,\n", " sale_week\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] }, { "cell_type": "markdown", "id": "0d157c2f", "metadata": {}, "source": [ "## Ranking\n", "\n", "- `rank`: returns the same ranking in case of a tie, with gaps in the rankings\n", "- `row_number`: returns a unique number for each row, with rankings arbitrarily assigned in case of a tie\n", "- `dense_rank`: returns the same ranking in the case of a tie, with no gaps in the rankings\n", "\n", "Ref: https://blog.jooq.org/the-difference-between-row_number-rank-and-dense_rank/\n", "\n", "\n", "```SQL\n", "SELECT \n", "\tPatientGender,\n", "\tPatientMaritalStatus, \n", "\tcount(*) StatusCount, \n", "\trank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,\n", "\trow_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,\n", "\tdense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank\n", "FROM \n", "PatientCorePopulatedTable\n", "GROUP BY PatientGender, PatientMaritalStatus\n", "```" ] }, { "cell_type": "code", "execution_count": 9, "id": "c43fc496", "metadata": { "tags": [ "hide-input", "output_scroll" ] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 PatientGenderPatientMaritalStatusStatusCountStatusRankStatusRowNumberStatusDenseRank
0FemaleMarried23111
1FemaleSingle17222
2FemaleDivorced6333
3FemaleUnknown3444
4FemaleSeparated3454
5MaleMarried22111
6MaleSingle15222
7MaleDivorced5333
8MaleUnknown3444
9MaleSeparated2555
10MaleWidowed1666
\n" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlite3\n", "import pandas as pd\n", "\n", "conn = sqlite3.connect('100_patients.db')\n", "\n", "sql_statement = \"\"\"\n", "SELECT \n", "\tPatientGender,\n", "\tPatientMaritalStatus, \n", "\tcount(*) StatusCount, \n", "\trank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRank,\n", "\trow_number() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusRowNumber,\n", "\tdense_rank() OVER (PARTITION BY PatientGender ORDER BY COUNT(*) DESC) StatusDenseRank\n", "FROM \n", "PatientCorePopulatedTable\n", "GROUP BY PatientGender, PatientMaritalStatus\n", "\"\"\"\n", "df = pd.read_sql_query(sql_statement, conn)\n", "df.style.set_table_attributes('style=\"font-size: 12px\"')" ] } ], "metadata": { "jupytext": { "formats": "md:myst", "text_representation": { "extension": ".md", "format_name": "myst" } }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "source_map": [ 11, 60, 104, 147, 196, 242, 290, 333, 381, 406, 433, 445, 463, 487, 515, 555, 601, 625 ] }, "nbformat": 4, "nbformat_minor": 5 }