{ "cells": [ { "attachments": {}, "cell_type": "markdown", "id": "2e3c64a7", "metadata": {}, "source": [ "# Filtering" ] }, { "cell_type": "code", "execution_count": 1, "id": "b24b4aa8-f56d-42d4-8f16-a90073bc9c7f", "metadata": {}, "outputs": [], "source": [ "import onetick.py as otp" ] }, { "cell_type": "markdown", "id": "bfcab03a-8973-4d02-8cce-8461afad511a", "metadata": {}, "source": [ "Let's start with an unfiltered time series." ] }, { "cell_type": "code", "execution_count": 12, "id": "a171fb32-9ed7-4869-90ed-ab5a2fc676a6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimePRICESIZECONDEXCHANGE
02023-03-29 09:30:00.000877568399.920400TP
12023-03-29 09:30:00.001151232399.9201000TT
22023-03-29 09:30:00.001154304399.9201000TT
32023-03-29 09:30:00.001921280399.930657TT
42023-03-29 09:30:00.010831360399.925100FZ
..................
5376092023-03-29 15:59:59.994555136401.350643FP
5376102023-03-29 15:59:59.995045376401.350900FP
5376112023-03-29 15:59:59.997313024401.340100Z
5376122023-03-29 15:59:59.997354752401.340498N
5376132023-03-29 15:59:59.997406208401.340200T
\n", "

537614 rows × 5 columns

\n", "
" ], "text/plain": [ " Time PRICE SIZE COND EXCHANGE\n", "0 2023-03-29 09:30:00.000877568 399.920 400 T P\n", "1 2023-03-29 09:30:00.001151232 399.920 1000 T T\n", "2 2023-03-29 09:30:00.001154304 399.920 1000 T T\n", "3 2023-03-29 09:30:00.001921280 399.930 657 T T\n", "4 2023-03-29 09:30:00.010831360 399.925 100 F Z\n", "... ... ... ... ... ...\n", "537609 2023-03-29 15:59:59.994555136 401.350 643 F P\n", "537610 2023-03-29 15:59:59.995045376 401.350 900 F P\n", "537611 2023-03-29 15:59:59.997313024 401.340 100 Z\n", "537612 2023-03-29 15:59:59.997354752 401.340 498 N\n", "537613 2023-03-29 15:59:59.997406208 401.340 200 T\n", "\n", "[537614 rows x 5 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "otp.config['tz'] = 'EST5EDT'\n", "q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n", "q = q[['PRICE','SIZE','COND','EXCHANGE']]\n", "otp.run(q, start=otp.dt(2023,3,29,9,30), end=otp.dt(2023,3,29,16), symbols=['SPY'])" ] }, { "cell_type": "markdown", "id": "cd690ecc-9b96-4a9b-8adf-7e78e9f5dae7", "metadata": {}, "source": [ "We can filter by the value of a field." ] }, { "cell_type": "code", "execution_count": 13, "id": "0de743e9-a63a-4718-bc26-5b337458558b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimePRICESIZECONDEXCHANGE
02023-03-29 09:30:00.001151232399.921000TT
12023-03-29 09:30:00.001154304399.921000TT
22023-03-29 09:30:00.001921280399.93657TT
32023-03-29 09:30:00.034302720399.92100TT
42023-03-29 09:30:00.050233856399.9310TIT
..................
855092023-03-29 15:59:59.990925568401.35297FT
855102023-03-29 15:59:59.992829184401.35500T
855112023-03-29 15:59:59.993334272401.35257FT
855122023-03-29 15:59:59.993776384401.35200FT
855132023-03-29 15:59:59.997406208401.34200T
\n", "

85514 rows × 5 columns

\n", "
" ], "text/plain": [ " Time PRICE SIZE COND EXCHANGE\n", "0 2023-03-29 09:30:00.001151232 399.92 1000 T T\n", "1 2023-03-29 09:30:00.001154304 399.92 1000 T T\n", "2 2023-03-29 09:30:00.001921280 399.93 657 T T\n", "3 2023-03-29 09:30:00.034302720 399.92 100 T T\n", "4 2023-03-29 09:30:00.050233856 399.93 10 TI T\n", "... ... ... ... ... ...\n", "85509 2023-03-29 15:59:59.990925568 401.35 297 F T\n", "85510 2023-03-29 15:59:59.992829184 401.35 500 T\n", "85511 2023-03-29 15:59:59.993334272 401.35 257 F T\n", "85512 2023-03-29 15:59:59.993776384 401.35 200 F T\n", "85513 2023-03-29 15:59:59.997406208 401.34 200 T\n", "\n", "[85514 rows x 5 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n", "q = q[['PRICE','SIZE','COND','EXCHANGE']]\n", "q, _ = q[q['EXCHANGE'] == 'T']\n", "otp.run(q, start=otp.dt(2023,3,29,9,30), end=otp.dt(2023,3,29,16), symbols=['SPY'])" ] }, { "cell_type": "markdown", "id": "c6231302-ebe0-4ec0-ae0b-7dd3925abc07", "metadata": {}, "source": [ "Note that the result of a filter expression returns 2 values. The first value is the query returning ticks that satisfy the conditions. The second value is the query returning ticks that do not satisfy the conditions. Most of the time you'd be interested just in the ticks that pass the filter and can use a placeholder `_` for the second value." ] }, { "cell_type": "markdown", "id": "fb697b63-adb6-4f32-90bc-a68c82c0a9b3", "metadata": {}, "source": [ "Also, note that all of the filtering is done in OneTick not in Python, which is much more efficient and lets you work with much bigger data sets." ] }, { "cell_type": "markdown", "id": "f9e5df5e-f226-46af-b9de-9dcad5015402", "metadata": {}, "source": [ "Filtering for a specific trade condition is done with the string matching methods." ] }, { "cell_type": "code", "execution_count": 17, "id": "3865d9ef-fac5-407f-89e9-bd8f5b068aa3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimePRICESIZECONDEXCHANGE
02023-03-29 09:30:00.012671744399.92015F IK
12023-03-29 09:30:00.020451328399.9251IZ
22023-03-29 09:30:00.024571136399.9251IZ
32023-03-29 09:30:00.029166080399.92510IZ
42023-03-29 09:30:00.050233856399.93010TIT
..................
2315182023-03-29 15:59:59.948900352401.3401ID
2315192023-03-29 15:59:59.966633728401.3453IP
2315202023-03-29 15:59:59.990119680401.3503IP
2315212023-03-29 15:59:59.990203648401.3603F IP
2315222023-03-29 15:59:59.990712064401.3503IP
\n", "

231523 rows × 5 columns

\n", "
" ], "text/plain": [ " Time PRICE SIZE COND EXCHANGE\n", "0 2023-03-29 09:30:00.012671744 399.920 15 F I K\n", "1 2023-03-29 09:30:00.020451328 399.925 1 I Z\n", "2 2023-03-29 09:30:00.024571136 399.925 1 I Z\n", "3 2023-03-29 09:30:00.029166080 399.925 10 I Z\n", "4 2023-03-29 09:30:00.050233856 399.930 10 TI T\n", "... ... ... ... ... ...\n", "231518 2023-03-29 15:59:59.948900352 401.340 1 I D\n", "231519 2023-03-29 15:59:59.966633728 401.345 3 I P\n", "231520 2023-03-29 15:59:59.990119680 401.350 3 I P\n", "231521 2023-03-29 15:59:59.990203648 401.360 3 F I P\n", "231522 2023-03-29 15:59:59.990712064 401.350 3 I P\n", "\n", "[231523 rows x 5 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n", "q = q[['PRICE','SIZE','COND','EXCHANGE']]\n", "q, _ = q[q['COND'].str.contains('I')]\n", "otp.run(q, start=otp.dt(2023,3,29,9,30), end=otp.dt(2023,3,29,16), symbols=['SPY'])" ] }, { "cell_type": "markdown", "id": "ef8e7e9b-e95b-44b0-a37c-ab3a2ecc9b15", "metadata": {}, "source": [ "Trade filter that limits attention to on-exchange continuous trading trades looks like this (it's used when creating bars)." ] }, { "cell_type": "code", "execution_count": 16, "id": "77b9bc26-5b96-483d-89df-409d53c02eca", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimePRICESIZECONDEXCHANGE
02023-03-29 09:30:00.010831360399.925100FZ
12023-03-29 09:30:00.011159296399.930100FK
22023-03-29 09:30:00.030200576399.920100K
32023-03-29 09:30:00.035513856399.925100Z
42023-03-29 09:30:00.060566528399.920200K
..................
3050702023-03-29 15:59:59.994555136401.350643FP
3050712023-03-29 15:59:59.995045376401.350900FP
3050722023-03-29 15:59:59.997313024401.340100Z
3050732023-03-29 15:59:59.997354752401.340498N
3050742023-03-29 15:59:59.997406208401.340200T
\n", "

305075 rows × 5 columns

\n", "
" ], "text/plain": [ " Time PRICE SIZE COND EXCHANGE\n", "0 2023-03-29 09:30:00.010831360 399.925 100 F Z\n", "1 2023-03-29 09:30:00.011159296 399.930 100 F K\n", "2 2023-03-29 09:30:00.030200576 399.920 100 K\n", "3 2023-03-29 09:30:00.035513856 399.925 100 Z\n", "4 2023-03-29 09:30:00.060566528 399.920 200 K\n", "... ... ... ... ... ...\n", "305070 2023-03-29 15:59:59.994555136 401.350 643 F P\n", "305071 2023-03-29 15:59:59.995045376 401.350 900 F P\n", "305072 2023-03-29 15:59:59.997313024 401.340 100 Z\n", "305073 2023-03-29 15:59:59.997354752 401.340 498 N\n", "305074 2023-03-29 15:59:59.997406208 401.340 200 T\n", "\n", "[305075 rows x 5 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n", "q = q[['PRICE','SIZE','COND','EXCHANGE']]\n", "q, _ = q[q['COND'].str.match('^[^O6TUHILNRWZ47QMBCGPV]*$')]\n", "otp.run(q, start=otp.dt(2023,3,29,9,30), end=otp.dt(2023,3,29,16), symbols=['SPY'])" ] }, { "cell_type": "markdown", "id": "ab43354f-56f7-43dc-b040-8b9441231a26", "metadata": {}, "source": [ "Filters can include 'and' and/or 'or' clauses." ] }, { "cell_type": "code", "execution_count": 15, "id": "8c9fc0db-94c5-4de5-b07f-9f71a89417a7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimePRICESIZECONDEXCHANGE
02023-03-29 09:48:06.039307008400.035F IT
12023-03-29 09:48:06.039391232400.0395F IT
22023-03-29 09:48:06.039394304400.03200FT
32023-03-29 09:48:06.039400192400.03100T
42023-03-29 09:48:06.039481344400.03130T
..................
388322023-03-29 15:59:59.990925568401.35297FT
388332023-03-29 15:59:59.992829184401.35500T
388342023-03-29 15:59:59.993334272401.35257FT
388352023-03-29 15:59:59.993776384401.35200FT
388362023-03-29 15:59:59.997406208401.34200T
\n", "

38837 rows × 5 columns

\n", "
" ], "text/plain": [ " Time PRICE SIZE COND EXCHANGE\n", "0 2023-03-29 09:48:06.039307008 400.03 5 F I T\n", "1 2023-03-29 09:48:06.039391232 400.03 95 F I T\n", "2 2023-03-29 09:48:06.039394304 400.03 200 F T\n", "3 2023-03-29 09:48:06.039400192 400.03 100 T\n", "4 2023-03-29 09:48:06.039481344 400.03 130 T\n", "... ... ... ... ... ...\n", "38832 2023-03-29 15:59:59.990925568 401.35 297 F T\n", "38833 2023-03-29 15:59:59.992829184 401.35 500 T\n", "38834 2023-03-29 15:59:59.993334272 401.35 257 F T\n", "38835 2023-03-29 15:59:59.993776384 401.35 200 F T\n", "38836 2023-03-29 15:59:59.997406208 401.34 200 T\n", "\n", "[38837 rows x 5 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q = otp.DataSource('NYSE_TAQ', tick_type='TRD')\n", "q = q[['PRICE','SIZE','COND','EXCHANGE']]\n", "q, _ = q[(q['EXCHANGE'] == 'T') & (q['PRICE'] > 400)]\n", "otp.run(q, start=otp.dt(2023,3,29,9,30), end=otp.dt(2023,3,29,16), symbols=['SPY'])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.8" } }, "nbformat": 4, "nbformat_minor": 5 }