{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "aef8b826",
"metadata": {},
"source": [
"# Aggregating"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "157bf535-0415-431d-a9d7-0375319e6bf1",
"metadata": {},
"source": [
"Let's start with an unaggregated time series."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "91d63514-8e66-4680-be77-252937bdd85d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" PRICE | \n",
" SIZE | \n",
" COND | \n",
" EXCHANGE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-05-15 09:30:00.000178688 | \n",
" 412.22 | \n",
" 100 | \n",
" T | \n",
" P | \n",
"
\n",
" \n",
" 1 | \n",
" 2023-05-15 09:30:00.000776704 | \n",
" 412.22 | \n",
" 247 | \n",
" | \n",
" Z | \n",
"
\n",
" \n",
" 2 | \n",
" 2023-05-15 09:30:00.003603456 | \n",
" 412.22 | \n",
" 100 | \n",
" T | \n",
" T | \n",
"
\n",
" \n",
" 3 | \n",
" 2023-05-15 09:30:00.006352128 | \n",
" 412.24 | \n",
" 1 | \n",
" I | \n",
" K | \n",
"
\n",
" \n",
" 4 | \n",
" 2023-05-15 09:30:00.007128064 | \n",
" 412.24 | \n",
" 3 | \n",
" I | \n",
" K | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 310 | \n",
" 2023-05-15 09:30:00.934032640 | \n",
" 412.27 | \n",
" 160 | \n",
" T | \n",
" T | \n",
"
\n",
" \n",
" 311 | \n",
" 2023-05-15 09:30:00.975609344 | \n",
" 412.24 | \n",
" 2 | \n",
" I | \n",
" D | \n",
"
\n",
" \n",
" 312 | \n",
" 2023-05-15 09:30:00.980264448 | \n",
" 412.27 | \n",
" 1 | \n",
" I | \n",
" D | \n",
"
\n",
" \n",
" 313 | \n",
" 2023-05-15 09:30:00.985391616 | \n",
" 412.28 | \n",
" 100 | \n",
" | \n",
" T | \n",
"
\n",
" \n",
" 314 | \n",
" 2023-05-15 09:30:00.985394944 | \n",
" 412.28 | \n",
" 100 | \n",
" Q | \n",
" T | \n",
"
\n",
" \n",
"
\n",
"
315 rows × 5 columns
\n",
"
"
],
"text/plain": [
" Time PRICE SIZE COND EXCHANGE\n",
"0 2023-05-15 09:30:00.000178688 412.22 100 T P\n",
"1 2023-05-15 09:30:00.000776704 412.22 247 Z\n",
"2 2023-05-15 09:30:00.003603456 412.22 100 T T\n",
"3 2023-05-15 09:30:00.006352128 412.24 1 I K\n",
"4 2023-05-15 09:30:00.007128064 412.24 3 I K\n",
".. ... ... ... ... ...\n",
"310 2023-05-15 09:30:00.934032640 412.27 160 T T\n",
"311 2023-05-15 09:30:00.975609344 412.24 2 I D\n",
"312 2023-05-15 09:30:00.980264448 412.27 1 I D\n",
"313 2023-05-15 09:30:00.985391616 412.28 100 T\n",
"314 2023-05-15 09:30:00.985394944 412.28 100 Q T\n",
"\n",
"[315 rows x 5 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import onetick.py as otp\n",
"\n",
"s = otp.dt(2023, 5, 15, 9, 30)\n",
"e = otp.dt(2023, 5, 15, 9, 30, 1)\n",
"\n",
"q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n",
"q = q[['PRICE', 'SIZE', 'COND', 'EXCHANGE']]\n",
"otp.run(q, start=s, end=e, symbols=['SPY'])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "56dbfa00-45b9-408c-bfc0-3ea78f601498",
"metadata": {},
"source": [
"Let's make a note of the total number of trades.\n",
"\n",
"We can aggregate over the entire queried interval."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "34a94316-be74-4548-9f2f-0c7ea6781289",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" volume | \n",
" vwap | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-05-15 09:30:01 | \n",
" 324352 | \n",
" 412.212012 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time volume vwap\n",
"0 2023-05-15 09:30:01 324352 412.212012"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n",
"q = q[['PRICE', 'SIZE', 'COND', 'EXCHANGE']]\n",
"q = q.agg({\n",
" 'volume': otp.agg.sum('SIZE'),\n",
" 'vwap': otp.agg.vwap('PRICE','SIZE')\n",
"})\n",
"otp.run(q, start=s, end=e, symbols=['SPY'])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "53f66ca3-1029-49d3-97c0-c1f4dd3ad842",
"metadata": {},
"source": [
"Or over fixed buckets (aka bars or windows)."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2d408201-90be-44e1-b8c5-0f4dc371e799",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" volume | \n",
" vwap | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-05-15 09:30:00.100 | \n",
" 1498 | \n",
" 412.235948 | \n",
"
\n",
" \n",
" 1 | \n",
" 2023-05-15 09:30:00.200 | \n",
" 7743 | \n",
" 412.237833 | \n",
"
\n",
" \n",
" 2 | \n",
" 2023-05-15 09:30:00.300 | \n",
" 1815 | \n",
" 412.243515 | \n",
"
\n",
" \n",
" 3 | \n",
" 2023-05-15 09:30:00.400 | \n",
" 307298 | \n",
" 412.210324 | \n",
"
\n",
" \n",
" 4 | \n",
" 2023-05-15 09:30:00.500 | \n",
" 2205 | \n",
" 412.250503 | \n",
"
\n",
" \n",
" 5 | \n",
" 2023-05-15 09:30:00.600 | \n",
" 416 | \n",
" 412.258750 | \n",
"
\n",
" \n",
" 6 | \n",
" 2023-05-15 09:30:00.700 | \n",
" 276 | \n",
" 412.244354 | \n",
"
\n",
" \n",
" 7 | \n",
" 2023-05-15 09:30:00.800 | \n",
" 1755 | \n",
" 412.240875 | \n",
"
\n",
" \n",
" 8 | \n",
" 2023-05-15 09:30:00.900 | \n",
" 595 | \n",
" 412.245782 | \n",
"
\n",
" \n",
" 9 | \n",
" 2023-05-15 09:30:01.000 | \n",
" 751 | \n",
" 412.267643 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time volume vwap\n",
"0 2023-05-15 09:30:00.100 1498 412.235948\n",
"1 2023-05-15 09:30:00.200 7743 412.237833\n",
"2 2023-05-15 09:30:00.300 1815 412.243515\n",
"3 2023-05-15 09:30:00.400 307298 412.210324\n",
"4 2023-05-15 09:30:00.500 2205 412.250503\n",
"5 2023-05-15 09:30:00.600 416 412.258750\n",
"6 2023-05-15 09:30:00.700 276 412.244354\n",
"7 2023-05-15 09:30:00.800 1755 412.240875\n",
"8 2023-05-15 09:30:00.900 595 412.245782\n",
"9 2023-05-15 09:30:01.000 751 412.267643"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n",
"q = q[['PRICE', 'SIZE', 'COND', 'EXCHANGE']]\n",
"q = q.agg({\n",
" 'volume': otp.agg.sum('SIZE'),\n",
" 'vwap': otp.agg.vwap('PRICE','SIZE')\n",
"}, bucket_interval=.1)\n",
"otp.run(q, start=s, end=e, symbols=['SPY'])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "038be2be-08c2-432f-88c0-a021d3e6a737",
"metadata": {},
"source": [
"Or over a sliding window."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "60d494a6-58ba-4b0a-9f5e-f6f01ecb9987",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" volume | \n",
" vwap | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-05-15 09:30:00.000178688 | \n",
" 100 | \n",
" 412.220000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2023-05-15 09:30:00.000776704 | \n",
" 347 | \n",
" 412.220000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2023-05-15 09:30:00.003603456 | \n",
" 447 | \n",
" 412.220000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2023-05-15 09:30:00.006352128 | \n",
" 448 | \n",
" 412.220045 | \n",
"
\n",
" \n",
" 4 | \n",
" 2023-05-15 09:30:00.007128064 | \n",
" 451 | \n",
" 412.220177 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 604 | \n",
" 2023-05-15 09:30:00.980264448 | \n",
" 553 | \n",
" 412.263074 | \n",
"
\n",
" \n",
" 605 | \n",
" 2023-05-15 09:30:00.982660864 | \n",
" 552 | \n",
" 412.263062 | \n",
"
\n",
" \n",
" 606 | \n",
" 2023-05-15 09:30:00.985391616 | \n",
" 652 | \n",
" 412.265660 | \n",
"
\n",
" \n",
" 607 | \n",
" 2023-05-15 09:30:00.985394944 | \n",
" 752 | \n",
" 412.267566 | \n",
"
\n",
" \n",
" 608 | \n",
" 2023-05-15 09:30:00.993288704 | \n",
" 751 | \n",
" 412.267643 | \n",
"
\n",
" \n",
"
\n",
"
609 rows × 3 columns
\n",
"
"
],
"text/plain": [
" Time volume vwap\n",
"0 2023-05-15 09:30:00.000178688 100 412.220000\n",
"1 2023-05-15 09:30:00.000776704 347 412.220000\n",
"2 2023-05-15 09:30:00.003603456 447 412.220000\n",
"3 2023-05-15 09:30:00.006352128 448 412.220045\n",
"4 2023-05-15 09:30:00.007128064 451 412.220177\n",
".. ... ... ...\n",
"604 2023-05-15 09:30:00.980264448 553 412.263074\n",
"605 2023-05-15 09:30:00.982660864 552 412.263062\n",
"606 2023-05-15 09:30:00.985391616 652 412.265660\n",
"607 2023-05-15 09:30:00.985394944 752 412.267566\n",
"608 2023-05-15 09:30:00.993288704 751 412.267643\n",
"\n",
"[609 rows x 3 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n",
"q = q[['PRICE', 'SIZE', 'COND', 'EXCHANGE']]\n",
"q = q.agg({\n",
" 'volume': otp.agg.sum('SIZE'),\n",
" 'vwap': otp.agg.vwap('PRICE','SIZE')\n",
"}, bucket_interval=.1, running=True)\n",
"otp.run(q, start=s, end=e, symbols=['SPY'])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "66ad92c4-2944-43c0-b950-7f6888f74867",
"metadata": {},
"source": [
"Note that the number of output ticks is more than the number of trades. This is due to the output tick being created not only when each input tick enters the window but also when it drops out."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "8d28f734-8ffe-4525-9261-6f000944d27f",
"metadata": {},
"source": [
"We can display all fields of the incoming tick along with the current values of the sliding window metrics."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "fad649b3-41c1-4dbb-b130-12f47eac85e1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" PRICE | \n",
" SIZE | \n",
" COND | \n",
" EXCHANGE | \n",
" volume | \n",
" vwap | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-05-15 09:30:00.000178688 | \n",
" 412.22 | \n",
" 100 | \n",
" T | \n",
" P | \n",
" 100 | \n",
" 412.220000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2023-05-15 09:30:00.000776704 | \n",
" 412.22 | \n",
" 247 | \n",
" | \n",
" Z | \n",
" 347 | \n",
" 412.220000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2023-05-15 09:30:00.003603456 | \n",
" 412.22 | \n",
" 100 | \n",
" T | \n",
" T | \n",
" 447 | \n",
" 412.220000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2023-05-15 09:30:00.006352128 | \n",
" 412.24 | \n",
" 1 | \n",
" I | \n",
" K | \n",
" 448 | \n",
" 412.220045 | \n",
"
\n",
" \n",
" 4 | \n",
" 2023-05-15 09:30:00.007128064 | \n",
" 412.24 | \n",
" 3 | \n",
" I | \n",
" K | \n",
" 451 | \n",
" 412.220177 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 310 | \n",
" 2023-05-15 09:30:00.934032640 | \n",
" 412.27 | \n",
" 160 | \n",
" T | \n",
" T | \n",
" 801 | \n",
" 412.255893 | \n",
"
\n",
" \n",
" 311 | \n",
" 2023-05-15 09:30:00.975609344 | \n",
" 412.24 | \n",
" 2 | \n",
" I | \n",
" D | \n",
" 552 | \n",
" 412.263062 | \n",
"
\n",
" \n",
" 312 | \n",
" 2023-05-15 09:30:00.980264448 | \n",
" 412.27 | \n",
" 1 | \n",
" I | \n",
" D | \n",
" 553 | \n",
" 412.263074 | \n",
"
\n",
" \n",
" 313 | \n",
" 2023-05-15 09:30:00.985391616 | \n",
" 412.28 | \n",
" 100 | \n",
" | \n",
" T | \n",
" 652 | \n",
" 412.265660 | \n",
"
\n",
" \n",
" 314 | \n",
" 2023-05-15 09:30:00.985394944 | \n",
" 412.28 | \n",
" 100 | \n",
" Q | \n",
" T | \n",
" 752 | \n",
" 412.267566 | \n",
"
\n",
" \n",
"
\n",
"
315 rows × 7 columns
\n",
"
"
],
"text/plain": [
" Time PRICE SIZE COND EXCHANGE volume vwap\n",
"0 2023-05-15 09:30:00.000178688 412.22 100 T P 100 412.220000\n",
"1 2023-05-15 09:30:00.000776704 412.22 247 Z 347 412.220000\n",
"2 2023-05-15 09:30:00.003603456 412.22 100 T T 447 412.220000\n",
"3 2023-05-15 09:30:00.006352128 412.24 1 I K 448 412.220045\n",
"4 2023-05-15 09:30:00.007128064 412.24 3 I K 451 412.220177\n",
".. ... ... ... ... ... ... ...\n",
"310 2023-05-15 09:30:00.934032640 412.27 160 T T 801 412.255893\n",
"311 2023-05-15 09:30:00.975609344 412.24 2 I D 552 412.263062\n",
"312 2023-05-15 09:30:00.980264448 412.27 1 I D 553 412.263074\n",
"313 2023-05-15 09:30:00.985391616 412.28 100 T 652 412.265660\n",
"314 2023-05-15 09:30:00.985394944 412.28 100 Q T 752 412.267566\n",
"\n",
"[315 rows x 7 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n",
"q = q[['PRICE', 'SIZE', 'COND', 'EXCHANGE']]\n",
"q = q.agg({\n",
" 'volume': otp.agg.sum('SIZE'),\n",
" 'vwap': otp.agg.vwap('PRICE','SIZE')\n",
"}, bucket_interval=.1, running=True, all_fields=True)\n",
"otp.run(q, start=s, end=e, symbols=['SPY'])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "97d03ed4-cbf3-49e6-9989-8fee3b49b172",
"metadata": {},
"source": [
"In this case, we are back to the same number of ticks as the number trades as an output tick is only created on arrival of an input tick."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "9e7454b1-deda-48dc-8a01-da63d48b4a5e",
"metadata": {},
"source": [
"All of the aggregation operations support grouping."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b37e62f1-ff2c-4913-b9d7-bde925bfface",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time | \n",
" EXCHANGE | \n",
" volume | \n",
" vwap | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2023-05-15 09:30:01 | \n",
" A | \n",
" 100 | \n",
" 412.240000 | \n",
"
\n",
" \n",
" 1 | \n",
" 2023-05-15 09:30:01 | \n",
" B | \n",
" 100 | \n",
" 412.220000 | \n",
"
\n",
" \n",
" 2 | \n",
" 2023-05-15 09:30:01 | \n",
" C | \n",
" 102 | \n",
" 412.250000 | \n",
"
\n",
" \n",
" 3 | \n",
" 2023-05-15 09:30:01 | \n",
" D | \n",
" 3269 | \n",
" 412.231798 | \n",
"
\n",
" \n",
" 4 | \n",
" 2023-05-15 09:30:01 | \n",
" H | \n",
" 8 | \n",
" 412.260000 | \n",
"
\n",
" \n",
" 5 | \n",
" 2023-05-15 09:30:01 | \n",
" K | \n",
" 2559 | \n",
" 412.240684 | \n",
"
\n",
" \n",
" 6 | \n",
" 2023-05-15 09:30:01 | \n",
" N | \n",
" 422 | \n",
" 412.249976 | \n",
"
\n",
" \n",
" 7 | \n",
" 2023-05-15 09:30:01 | \n",
" P | \n",
" 304988 | \n",
" 412.210209 | \n",
"
\n",
" \n",
" 8 | \n",
" 2023-05-15 09:30:01 | \n",
" T | \n",
" 8141 | \n",
" 412.243563 | \n",
"
\n",
" \n",
" 9 | \n",
" 2023-05-15 09:30:01 | \n",
" U | \n",
" 400 | \n",
" 412.250000 | \n",
"
\n",
" \n",
" 10 | \n",
" 2023-05-15 09:30:01 | \n",
" X | \n",
" 200 | \n",
" 412.250000 | \n",
"
\n",
" \n",
" 11 | \n",
" 2023-05-15 09:30:01 | \n",
" Z | \n",
" 4063 | \n",
" 412.238673 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time EXCHANGE volume vwap\n",
"0 2023-05-15 09:30:01 A 100 412.240000\n",
"1 2023-05-15 09:30:01 B 100 412.220000\n",
"2 2023-05-15 09:30:01 C 102 412.250000\n",
"3 2023-05-15 09:30:01 D 3269 412.231798\n",
"4 2023-05-15 09:30:01 H 8 412.260000\n",
"5 2023-05-15 09:30:01 K 2559 412.240684\n",
"6 2023-05-15 09:30:01 N 422 412.249976\n",
"7 2023-05-15 09:30:01 P 304988 412.210209\n",
"8 2023-05-15 09:30:01 T 8141 412.243563\n",
"9 2023-05-15 09:30:01 U 400 412.250000\n",
"10 2023-05-15 09:30:01 X 200 412.250000\n",
"11 2023-05-15 09:30:01 Z 4063 412.238673"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n",
"q = q[['PRICE', 'SIZE', 'COND', 'EXCHANGE']]\n",
"q = q.agg({\n",
" 'volume': otp.agg.sum('SIZE'),\n",
" 'vwap': otp.agg.vwap('PRICE','SIZE')\n",
"}, group_by=['EXCHANGE'])\n",
"otp.run(q, start=s, end=e, symbols=['SPY'])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "4fefcfd9-3ed8-4341-968e-a9ea9fe81bf7",
"metadata": {},
"source": [
"A list of all aggregations appears [here](../../api/aggregations/root.rst). It can also be retrieved with `dir(otp.agg)`."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "ccafbd04-38c1-4b7a-b4cf-5cda4bbc521f",
"metadata": {},
"source": [
"**USE CASES**\n",
"\n",
"[Creating Bars](bars)\n",
"\n",
"[Golden Cross strategy](goldencross)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}