import ctypes
import functools
import inspect
import typing
import warnings
import pandas as pd
import numpy as np
from datetime import date as _date
from datetime import datetime as _datetime
from pandas import Timestamp as _timestamp
from pandas.tseries import offsets
import onetick.py as otp
from onetick.py.core._internal._op_utils.every_operand import every_operand
from onetick.py.utils import get_tzfile_by_name, get_timezone_from_datetime
# --------------------------------------------------------------- #
# TYPES IMPLEMENTATION
# --------------------------------------------------------------- #
class OTPBaseTimeStamp(type):
pass
class _nsectime(OTPBaseTimeStamp):
def __str__(cls):
return "nsectime"
[docs]class nsectime(int, metaclass=_nsectime):
"""
OneTick data type representing datetime with nanoseconds precision.
Can be used to specify otp.Source column type when converting columns or creating new ones.
Note that this constructor creates datetime value in GMT timezone
and doesn't take into account the timezone with which the query is executed.
Examples
--------
>>> t = otp.Tick(A=0)
>>> t['A'] = t['A'].apply(otp.nsectime)
>>> t['B'] = otp.nsectime(24 * 60 * 60 * 1000 * 1000 * 1000 + 2)
>>> t.schema
{'A': <class 'onetick.py.types.nsectime'>, 'B': <class 'onetick.py.types.nsectime'>}
>>> otp.run(t)
Time A B
0 2003-12-01 1969-12-31 19:00:00 1970-01-01 19:00:00.000000002
"""
pass
class _msectime(OTPBaseTimeStamp):
def __str__(cls):
return "msectime"
[docs]class msectime(int, metaclass=_msectime):
"""
OneTick data type representing datetime with milliseconds precision.
Can be used to specify otp.Source column type when converting columns or creating new ones.
Note that this constructor creates datetime value in GMT timezone
and doesn't take into account the timezone with which the query is executed.
Examples
--------
>>> t = otp.Tick(A=1)
>>> t = t.table(A=otp.msectime)
>>> t['B'] = otp.msectime(2)
>>> t.schema
{'A': <class 'onetick.py.types.msectime'>, 'B': <class 'onetick.py.types.msectime'>}
>>> otp.run(t)
Time A B
0 2003-12-01 1969-12-31 19:00:00.001 1969-12-31 19:00:00.002
"""
pass
class OTPBaseTimeOffset:
datepart = "'invalid'" # that is just base class for other dateparts
class ExpressionDefinedTimeOffset(OTPBaseTimeOffset):
def __init__(self, datepart, n):
self.datepart = datepart
self.n = n
from onetick.py.core.column_operations.base import _Operation
def proxy_wrap(attr):
def f(self, *args, **kwargs):
return getattr(self.n, attr)(*args, **kwargs)
return f
for attr, value in inspect.getmembers(_Operation, callable):
if not hasattr(ExpressionDefinedTimeOffset, attr):
setattr(ExpressionDefinedTimeOffset, attr, proxy_wrap(attr))
# ---------------------------- #
# Implement datepart units
def _construct_dpf(dp_class, str_repr=None, **dp_class_params):
""" construct a datepart factory """
if str_repr is None:
str_repr = dp_class.__name__.lower()
class _DatePartCls(dp_class, OTPBaseTimeOffset):
datepart = f"'{str_repr}'"
def _factory(n):
from onetick.py.core.column_operations._methods.methods import is_arithmetical
from onetick.py.core.column import _Column
if isinstance(n, int):
if dp_class_params:
return _DatePartCls(**dp_class_params) * n
return _DatePartCls(n)
if is_arithmetical(n):
n = _process_datediff(n)
return ExpressionDefinedTimeOffset(_DatePartCls.datepart, n)
if isinstance(n, _Column):
return ExpressionDefinedTimeOffset(_DatePartCls.datepart, n)
raise ValueError("Unknown type was passed as arg, integer constant or column or expression is expected here")
def _process_datediff(n):
n_time_operand = _get_n_time_operand(n)
if n_time_operand:
# check if otp.Hour(date1 - date2) is called, return a number of hours between two days in such ways
from onetick.py.core.column_operations._methods.methods import sub, _wrap_object
from onetick.py.core.column_operations.base import _Operation
from onetick.py.core.column import _Column, _LagOperator
available_types = (_Column, _LagOperator)
if (getattr(n, "_op_func", sub) and len(n._op_params) == 2
and isinstance(n._op_params[0], available_types) and isinstance(n._op_params[1], available_types)):
def _datediff(*args):
args = ', '.join(map(_wrap_object, args))
return f'DATEDIFF({_DatePartCls.datepart}, {args}, _TIMEZONE)', int
return _Operation(_datediff, [n._op_params[1], n._op_params[0]])
else:
raise ValueError(
"Date arithmetic operations (except date2-date1, which calculate an amount of "
"periods between two dates) are not accepted in TimeOffset constructor"
)
return n
def _get_n_time_operand(n):
from onetick.py.core.column_operations._methods.op_types import are_time
result = 0
for op in every_operand(n):
if are_time(get_object_type(op)):
result += 1
return result
return _factory
_add_examples_to_docs = """
Object representing {}'s datetime offset.
Can be added to or subtracted from:
* :py:class:`otp.datetime <onetick.py.datetime>` objects
* :py:class:`Source <onetick.py.Source>` columns of datetime type
Parameters
----------
n: int, :class:`~onetick.py.Column`, :class:`~onetick.py.Operation`
Offset integer value or column of :class:`~onetick.py.Source`.
The only :class:`~onetick.py.Operation` supported is
subtracting one datetime column from another. See example below.
Examples
--------
{}
""".format
Year = _construct_dpf(offsets.DateOffset, "year", years=1)
Year.__doc__ = _add_examples_to_docs('year', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Year(1)
2013-12-12 12:00:00
>>> otp.datetime(2012, 12, 12, 12) - otp.Year(1)
2011-12-12 12:00:00
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Year(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2013-12-12 12:00:00 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2023, 1, 1))
>>> t['DIFF'] = otp.Year(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2023-01-01 1
""")
Quarter = _construct_dpf(offsets.DateOffset, "quarter", months=3)
Quarter.__doc__ = _add_examples_to_docs('quarter', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Quarter(1)
2013-03-12 12:00:00
>>> otp.datetime(2012, 12, 12, 12) - otp.Quarter(1)
2012-09-12 12:00:00
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12, tz='GMT')
>>> t['T'] += otp.Quarter(t['A'])
>>> otp.run(t, start=otp.datetime(2003, 12, 2), end=otp.datetime(2003, 12, 3), timezone='GMT')
Time T A
0 2003-12-02 2013-03-12 12:00:00 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2023, 1, 1))
>>> t['DIFF'] = otp.Quarter(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2023-01-01 4
""")
Month = _construct_dpf(offsets.DateOffset, "month", months=1)
Month.__doc__ = _add_examples_to_docs('month', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Month(1)
2013-01-12 12:00:00
>>> otp.datetime(2012, 12, 12, 12) - otp.Month(1)
2012-11-12 12:00:00
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Month(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2013-01-12 12:00:00 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2023, 1, 1))
>>> t['DIFF'] = otp.Month(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2023-01-01 12
""")
Week = _construct_dpf(offsets.Week)
Week.__doc__ = _add_examples_to_docs('week', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Week(1)
2012-12-19 12:00:00
>>> otp.datetime(2012, 12, 12, 12) - otp.Week(1)
2012-12-05 12:00:00
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Week(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2012-12-19 12:00:00 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2023, 1, 1))
>>> t['DIFF'] = otp.Week(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2023-01-01 53
""")
Day = _construct_dpf(offsets.Day)
Day.__doc__ = _add_examples_to_docs('day', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Day(1)
2012-12-13 12:00:00
>>> otp.datetime(2012, 12, 12, 12) - otp.Day(1)
2012-12-11 12:00:00
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Day(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2012-12-13 12:00:00 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2023, 1, 1))
>>> t['DIFF'] = otp.Day(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2023-01-01 365
""")
Hour = _construct_dpf(offsets.Hour)
Hour.__doc__ = _add_examples_to_docs('hour', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Hour(1)
2012-12-12 13:00:00
>>> otp.datetime(2012, 12, 12, 12) - otp.Hour(1)
2012-12-12 11:00:00
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Hour(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2012-12-12 13:00:00 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2022, 1, 2))
>>> t['DIFF'] = otp.Hour(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2022-01-02 24
""")
Minute = _construct_dpf(offsets.Minute)
Minute.__doc__ = _add_examples_to_docs('minute', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Minute(1)
2012-12-12 12:01:00
>>> otp.datetime(2012, 12, 12, 12) - otp.Minute(1)
2012-12-12 11:59:00
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Minute(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2012-12-12 12:01:00 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2022, 1, 1, 1))
>>> t['DIFF'] = otp.Minute(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2022-01-01 01:00:00 60
""")
Second = _construct_dpf(offsets.Second)
Second.__doc__ = _add_examples_to_docs('second', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Second(1)
2012-12-12 12:00:01
>>> otp.datetime(2012, 12, 12, 12) - otp.Second(1)
2012-12-12 11:59:59
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Second(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2012-12-12 12:00:01 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2022, 1, 1, 0, 1))
>>> t['DIFF'] = otp.Second(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2022-01-01 00:01:00 60
""")
Milli = _construct_dpf(offsets.Milli, "millisecond")
Milli.__doc__ = _add_examples_to_docs('millisecond', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Milli(1)
2012-12-12 12:00:00.001000
>>> otp.datetime(2012, 12, 12, 12) - otp.Milli(1)
2012-12-12 11:59:59.999000
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Milli(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2012-12-12 12:00:00.001 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2022, 1, 1, 0, 0, 1))
>>> t['DIFF'] = otp.Milli(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2022-01-01 00:00:01 1000
""")
# microseconds are not supported yet
Nano = _construct_dpf(offsets.Nano, "nanosecond")
Nano.__doc__ = _add_examples_to_docs('nanosecond', """
Add to or subtract from :py:class:`otp.datetime <onetick.py.datetime>` object:
>>> otp.datetime(2012, 12, 12, 12) + otp.Nano(1)
2012-12-12 12:00:00.000000001
>>> otp.datetime(2012, 12, 12, 12) - otp.Nano(1)
2012-12-12 11:59:59.999999999
Use offset in columns:
>>> t = otp.Tick(A=1)
>>> t['T'] = otp.datetime(2012, 12, 12, 12)
>>> t['T'] += otp.Nano(t['A'])
>>> otp.run(t)
Time T A
0 2003-12-01 2012-12-12 12:00:00.000000001 1
Use it to calculate difference between two dates:
>>> t = otp.Tick(A=otp.dt(2022, 1, 1), B=otp.dt(2022, 1, 1, 0, 0, 1))
>>> t['DIFF'] = otp.Nano(t['B'] - t['A'])
>>> otp.run(t)
Time A B DIFF
0 2003-12-01 2022-01-01 2022-01-01 00:00:01 1000000000
""")
# ---------------------------- #
class _inner_string(type):
def __str__(cls):
if cls.length is Ellipsis:
return "varstring"
if cls.length:
return f"string[{cls.length}]"
else:
return "string"
def __repr__(cls):
return str(cls)
# We have ot use functools.cache, because 'class' in python is an object,
# and _inner_str for the same item is different for every call,
# but we want to make str[1024] be equal to another str[1024]
@functools.lru_cache(maxsize=None) # noqa: W1518
def __getitem__(cls, item):
class _inner_str(string, metaclass=_inner_string): # pylint: disable=E0601
def __len__(self):
return self.__class__.length
if (type(item) is not int or item < 1) and item is not Ellipsis:
raise TypeError("It is not allowed to have non numeric index")
_inner_str.length = item
return _inner_str
[docs]class string(str, metaclass=_inner_string):
"""
OneTick data type representing string with length and varstring.
To set string length use ``__getitem__``.
If the length is not set then the :py:attr:`~DEFAULT_LENGTH` length is used by default.
In this case using ``otp.string`` is the same as using ``str``.
If the length is set to Ellipse it represents varstring. Varstring is used for returning variably sized strings.
Note
----
If you try to set value with length x to string[y] and x > y, value will be truncated to y length.
Attributes
----------
DEFAULT_LENGTH: int
default length of the string when the length is not specified
Examples
--------
>>> # OTdirective: skip-snippet:;
>>> t = otp.Tick(A='a')
>>> t = t.table(A=otp.string[10])
>>> t.schema
{'A': string[10]}
Example of truncation column value to set string length.
>>> # OTdirective: skip-snippet:;
>>> t['A'] *= 100
>>> t['B'] = t['A'].str.len()
>>> otp.run(t)
Time A B
0 2003-12-01 aaaaaaaaaa 10
Example of string with default length.
>>> t = otp.Tick(A='a')
>>> t['A'] *= 100
>>> t['B'] = t['A'].str.len()
>>> otp.run(t)
Time A B
0 2003-12-01 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 64
Setting Ellipsis as length represents varstring.
>>> t = otp.Tick(A='a')
>>> t = t.table(A=otp.string[...])
>>> t.schema
{'A': varstring}
Varstring length is multiplied.
>>> t['A'] *= 65
>>> t['B'] = t['A'].str.len()
>>> otp.run(t)
Time A B
0 2003-12-01 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 65
`otp.varstring` is a shortcut:
>>> t = otp.Tick(A='a')
>>> t = t.table(A=otp.varstring)
>>> t.schema
{'A': varstring}
"""
DEFAULT_LENGTH = 64
length = None
varstring = string[...]
class _nan_base(type):
def __str__(cls):
return "double"
class _nan(float, metaclass=_nan_base):
"""
Object that represents NaN (not a number) float value.
Can be used anywhere where float value is expected.
Examples
--------
>>> t = otp.Ticks({'A': [1.1, 2.2, otp.nan]})
>>> t['B'] = otp.nan
>>> t['C'] = t['A'] / 0
>>> t['D'] = t['A'] + otp.nan
>>> otp.run(t)
Time A B C D
0 2003-12-01 00:00:00.000 1.1 NaN inf NaN
1 2003-12-01 00:00:00.001 2.2 NaN inf NaN
2 2003-12-01 00:00:00.002 NaN NaN NaN NaN
"""
__name__ = 'nan'
def __str__(self):
return "NAN()"
nan = _nan()
class _inf(float, metaclass=_nan_base):
"""
Object that represents infinity value.
Can be used anywhere where float value is expected.
Examples
--------
>>> t = otp.Ticks({'A': [1.1, 2.2, otp.inf]})
>>> t['B'] = otp.inf
>>> t['C'] = t['A'] / 0
>>> t['D'] = t['A'] - otp.inf
>>> otp.run(t)
Time A B C D
0 2003-12-01 00:00:00.000 1.1 inf inf -inf
1 2003-12-01 00:00:00.001 2.2 inf inf -inf
2 2003-12-01 00:00:00.002 inf inf inf NaN
"""
__name__ = 'inf'
def __init__(self):
self._sign = "" # empty string or '-' for negative infinity
def __str__(self):
return f"{self._sign}INFINITY()"
def __neg__(self):
result = _inf()
result._sign = "" if self._sign else "-"
return result
inf = _inf()
class _decimal_str(type):
def __str__(cls):
return 'decimal'
[docs]class decimal(float, metaclass=_decimal_str):
"""
Object that represents decimal OneTick value.
Decimal is 128 bit base 10 floating point number.
Examples
--------
>>> t = otp.Ticks({'A': [otp.decimal(1), otp.decimal(2)]})
>>> t['B'] = otp.decimal(1.23456789)
>>> t['C'] = t['A'] / 0
>>> t['D'] = t['A'] + otp.nan
>>> otp.run(t)
Time A B C D
0 2003-12-01 00:00:00.000 1.0 1.234568 inf NaN
1 2003-12-01 00:00:00.001 2.0 1.234568 inf NaN
"""
def __add__(self, other):
return self.__class__(super().__add__(other))
def __radd__(self, other):
return self.__class__(super().__radd__(other))
def __sub__(self, other):
return self.__class__(super().__sub__(other))
def __rsub__(self, other):
return self.__class__(super().__rsub__(other))
def __mul__(self, other):
return self.__class__(super().__mul__(other))
def __rmul__(self, other):
return self.__class__(super().__rmul__(other))
def __truediv__(self, other):
return self.__class__(super().__truediv__(other))
def __rtruediv__(self, other):
return self.__class__(super().__rtruediv__(other))
def __str__(self):
return f'{float(self)}'
def __repr__(self):
return f"{self.__class__.__name__}({self})"
# --------------------------------------------------------------- #
# AUXILIARY FUNCTIONS
# --------------------------------------------------------------- #
def is_type_basic(dtype):
return dtype in (
int,
float,
str,
byte,
short,
uint,
ulong,
nsectime,
msectime,
decimal,
) or issubclass(dtype, string)
# TODO: PY-632: unify these functions with others
def get_source_base_type(value):
if inspect.isclass(value):
value_type = value
if not is_type_basic(value_type):
warnings.warn('Setting schema with complex types is deprecated,'
' use basic type instead', DeprecationWarning)
else:
warnings.warn('Setting schema with instance of the class is deprecated,'
' use type instead', DeprecationWarning)
value_type = type(value)
# convert string to custom string if necessary
if value_type is str and len(value) > string.DEFAULT_LENGTH:
value_type = string[len(value)]
if issubclass(value_type, bool):
value_type = float
if is_time_type(value_type):
value_type = nsectime
# check valid value type
if get_base_type(value_type) not in [int, float, str, bool, decimal]:
raise TypeError(f'Type "{repr(value_type)}" is not supported.')
if not is_type_basic(value_type):
raise TypeError(f"Type {repr(value_type)} can't be set in schema.")
return value_type
def is_type_supported(dtype):
return get_base_type(dtype) in [int, float, str, bool, decimal] or issubclass(dtype, (datetime, date))
def get_base_type(obj):
if issubclass(obj, str):
return str
elif issubclass(obj, bool):
return bool
elif issubclass(obj, int):
return int
elif obj is decimal:
return decimal
elif issubclass(obj, float):
return float
# else
return type(None)
def get_object_type(obj):
if isinstance(obj, (_nan, _inf)):
return float
if isinstance(obj, typing.Type):
return obj
else:
if hasattr(obj, 'dtype'):
dtype = obj.dtype
if isinstance(dtype, np.dtype):
return dtype.type
return dtype
else:
if is_time_type(obj):
return nsectime
return type(obj)
def get_type_by_objects(objs):
"""
Helper that calculates the widest type of the list pased objects.
Used to determine type by returned values.
"""
# collect types
types = set()
for v in objs:
t = get_object_type(v)
if issubclass(t, str):
t = str
types.add(t)
# does not allow to mix string and numeric types
dtype = None
if str in types and (float in types or int in types or bool in types or nsectime in types or msectime in types):
raise TypeError("It is not allowed to return values of string type and numeric type in one function.")
# if there is only one value there, then
# use it as is
if len(types) == 1:
dtype = next(iter(types))
if dtype is bool:
return dtype
# process numeric types: the most generic is float
if int in types:
dtype = int
if bool in types:
dtype = float
if float in types:
dtype = float
# process string types, taking into account OneTick long strings
if str in types:
max_len = string.DEFAULT_LENGTH
for v in objs:
t = get_object_type(v)
if issubclass(t, string):
max_len = max(t.length, max_len)
elif isinstance(v, str):
max_len = max(len(v), max_len)
if max_len == string.DEFAULT_LENGTH:
dtype = str
else:
dtype = string[max_len] # pylint: disable=E1136
# process msectime and nsectime
if dtype is float and (msectime in types or nsectime in types):
raise TypeError("It is not allowed to return value of time type and float type in one function.")
if msectime in types:
dtype = msectime
if nsectime in types:
dtype = nsectime
# we assume the None value has float default value, ie NaN
if type(None) is dtype:
dtype = float
return dtype
# ------------------- #
# extend datetime
class AbstractTime:
def __init__(self):
self.ts: _timestamp
@property
def year(self):
return self.ts.year
@property
def month(self):
return self.ts.month
@property
def day(self):
return self.ts.day
def date(self):
return _date(self.year, self.month, self.day)
@property
def start(self):
return _timestamp(self.year, self.month, self.day)
@property
def end(self):
return _timestamp(next_day(self.start))
def strftime(self, fmt):
return self.ts.strftime(fmt)
@property
def value(self):
return self.ts.value
def timestamp(self):
return self.ts.timestamp()
def __eq__(self, other):
other = getattr(other, "ts", other)
return self.ts == other
def __hash__(self):
return hash(self.ts)
def __gt__(self, other):
other = getattr(other, "ts", other)
return self.ts > other
def __ge__(self, other):
other = getattr(other, "ts", other)
return self.ts >= other
def __lt__(self, other):
other = getattr(other, "ts", other)
return self.ts < other
def __le__(self, other):
other = getattr(other, "ts", other)
return self.ts <= other
[docs]class datetime(AbstractTime):
"""
Class `datetime` is used for representing date with time in onetick-py.
It can be used both when specifying start and end time for queries and
in column operations with :py:class:`onetick.py.Source`.
:ref:`Datetime offset objects <datetime_offsets>` (e.g. `otp.Nano`, `otp.Day`)
can be added to or subtracted from `otp.datetime` object.
Parameters
----------
first_arg: int, str, otp.datetime, pandas.Timestamp, datetime.datetime
If `month`, `day` and other parts of date are specified,
first argument will be considered as year.
Otherwise, first argument will be converted to otp.datetime.
month: int
Number between 1 and 12.
day: int
Number between 1 and 31.
hour: int, default=0
Number between 0 and 23.
minute: int, default=0
Number between 0 and 59.
second: int, default=0
Number between 0 and 59.
microsecond: int, default=0
Number between 0 and 999999.
nanosecond: int, default=0
Number between 0 and 999.
tzinfo: datetime.tzinfo
Timezone object.
tz: str
Timezone name.
Examples
--------
Initialization by datetime.datetime class from standard library
>>> otp.datetime(datetime(2019, 1, 1, 1))
2019-01-01 01:00:00
Initialization by pandas Timestamp class
>>> otp.datetime(pd.Timestamp(2019, 1, 1, 1))
2019-01-01 01:00:00
Initialization by int timestamp
>>> otp.datetime(1234567890)
1970-01-01 00:00:01.234567890
Initialization by params with nanoseconds
>>> otp.datetime(2019, 1, 1, 1, 2, 3, 4, 5)
2019-01-01 01:02:03.000004005
Initialization by string
>>> otp.datetime("2019/01/01 1:02")
2019-01-01 01:02:00
`otp.dt` is the alias for `otp.datetime`
>>> otp.dt(2019, 1, 1)
2019-01-01 00:00:00
See also
--------
:ref:`Datetime offset objects <datetime_guide>`.
"""
def __init__(
self,
first_arg,
month=None,
day=None,
hour=None,
minute=None,
second=None,
microsecond=None,
nanosecond=None,
*,
tzinfo=None,
tz=None,
): # TODO: python 3.8 change first_arg to positional only arg
tz, tzinfo = self._process_timezones_args(tz, tzinfo)
if not any([month, day, hour, minute, second, microsecond, nanosecond]):
result = self._create_from_one_arg(first_arg, tz, tzinfo)
else:
result = self._create_from_several_arg(first_arg, month, day, hour, minute, second, microsecond, nanosecond,
tzinfo)
self.ts = result
def _process_timezones_args(self, tz, tzinfo):
if tz is not None:
if tzinfo is None:
tzinfo = get_tzfile_by_name(tz) # pandas is broken https://github.com/pandas-dev/pandas/issues/31929
tz = None
else:
raise ValueError(
"tzinfo and tz params are mutually exclusive parameters, "
"they can't be specified both at the same time"
)
return tz, tzinfo
def _create_from_several_arg(self, first_arg, month, day, hour, minute, second, microsecond, nanosecond, tzinfo):
if nanosecond is not None and not (0 <= nanosecond <= 999):
raise ValueError(
"Nanosecond parameter should be between 0 and 999. "
"Please use microsecond parameter or otp.Nano object."
)
result = _timestamp(
first_arg, month, day, hour or 0, minute or 0, second or 0, microsecond or 0, nanosecond or 0
).replace(tzinfo=tzinfo)
return result
def _create_from_one_arg(self, first_arg, tz, tzinfo):
arg_tz = getattr(first_arg, "tz", None)
arg_tzinfo = getattr(first_arg, "tzinfo", None)
if tz and arg_tz and arg_tz != tz or tzinfo and arg_tzinfo and arg_tzinfo != tzinfo:
raise ValueError(
"You've specified the timezone for the object, which already has it. "
"It is recommended to swap the current timezone to desired by method of this object "
"and then create otp.datetime object."
)
if isinstance(first_arg, (datetime, date)):
first_arg = first_arg.ts
result = _timestamp(first_arg, tzinfo=tzinfo, tz=tz)
return result
@property
def start(self):
return super().start.replace(tzinfo=self.tzinfo)
@property
def end(self):
return super().end.replace(tzinfo=self.tzinfo)
[docs] def replace(self, **kwargs):
"""
Replace parts of `otp.datetime` object.
Parameters
----------
year: int, optional
month: int, optional
day: int, optional
hour: int, optional
minute: int, optional
second: int, optional
microsecond: int, optional
nanosecond: int, optional
tzinfo: tz-convertible, optional
Returns
-------
result: otp.datetime
Timestamp with fields replaced.
Examples
--------
>>> ts = otp.datetime(2022, 2, 24, 3, 15, 54, 999, 1)
>>> ts
2022-02-24 03:15:54.000999001
>>> ts.replace(year=2000, month=2, day=2, hour=2, minute=2, second=2, microsecond=2, nanosecond=2)
2000-02-02 02:02:02.000002002
"""
return datetime(self.ts.replace(**kwargs))
@property
def tz(self):
return self.ts.tz
@property
def tzinfo(self):
return self.ts.tzinfo
@property
def hour(self):
return self.ts.hour
@property
def minute(self):
return self.ts.minute
@property
def second(self):
return self.ts.second
@property
def microsecond(self):
return self.ts.microsecond
@property
def nanosecond(self):
return self.ts.nanosecond
[docs] @staticmethod
def now(tz=None):
"""
Will return `otp.datetime` object with timestamp at the moment of calling this function.
Not to be confused with function :func:`otp.now` which can only add column
with current timestamp to the `otp.Source` when running the query.
Parameters
----------
tz : str or timezone object, default None
Timezone to localize to.
"""
return datetime_now(_timestamp.now(tz))
[docs] def __add__(self, other):
"""
Add :ref:`datetime offset <datetime_offsets>` to otp.datetime.
Parameters
----------
other: OTPBaseTimeOffset, datetime
object to add
Returns
-------
result: datetime, :py:class:`pandas.Timedelta`
return datetime if otp.Nano or another date offset object was passed as an argument,
or pandas.Timedelta object if otp.datetime was passed as an argument.
Examples
--------
>>> otp.datetime(2022, 2, 24) + otp.Nano(1)
2022-02-24 00:00:00.000000001
"""
self._error_on_int_param(other, "+")
return datetime(self.ts + other)
[docs] def __sub__(self, other):
"""
Subtract :ref:`datetime offset <datetime_offsets>` from otp.datetime.
Parameters
----------
other: OTPBaseTimeOffset, datetime
object to subtract
Returns
-------
result: datetime, :py:class:`pandas.Timedelta`
return datetime if otp.Nano or another date offset object was passed as an argument,
or pandas.Timedelta object if otp.datetime was passed as an argument.
Examples
--------
>>> otp.datetime(2022, 2, 24) - otp.Nano(1)
2022-02-23 23:59:59.999999999
"""
self._error_on_int_param(other, "-")
other = getattr(other, "ts", other)
result = self.ts - other
# do not convert to datetime in case timedelta is returned (arg is date)
result = datetime(result) if isinstance(result, _timestamp) else result
return result
def _error_on_int_param(self, other, op):
if type(other) is int:
raise TypeError(f"unsupported operand type(s) for {op}: 'otp.datetime' and 'int'")
def __str__(self):
return str(self.ts)
def __repr__(self):
return str(self.ts)
def tz_localize(self, tz):
"""
Localize tz-naive datetime object to a given timezone
Parameters
----------
tz: str or tzinfo
timezone to localize datetime object into
Returns
-------
result: datetime
localized datetime object
Examples
--------
>>> d = otp.datetime(2021, 6, 3)
>>> d.tz_localize("EST5EDT")
2021-06-03 00:00:00-04:00
"""
return datetime(self.ts.tz_localize(tz))
def tz_convert(self, tz):
"""
Convert tz-aware datetime object to another timezone
Parameters
----------
tz: str or tzinfo
timezone to convert datetime object into
Returns
-------
result: datetime
converted datetime object
Examples
--------
>>> d = otp.datetime(2021, 6, 3, tz="EST5EDT")
>>> d.tz_convert("Europe/Moscow")
2021-06-03 07:00:00+03:00
"""
return datetime(self.ts.tz_convert(tz))
class datetime_now(datetime):
''' Class to distinguish the `now` in operations '''
dt = datetime
[docs]class date(datetime):
"""
Class `date` is used for representing date in onetick-py.
It can be used both when specifying start and end time for queries and
in column operations with onetick.py.Source.
Parameters
----------
first_arg: int, str, otp.datetime, pandas.Timestamp, datetime.datetime, datetime.date
If `month` and `day` arguments are specified, first argument will be considered as year.
Otherwise, first argument will be converted to otp.date.
month: int
Number between 1 and 12.
day: int
Number between 1 and 31.
Examples
--------
:ref:`Datetime guide <datetime_guide>`.
"""
def __init__(self, first_arg: typing.Union[int, str, _date, _datetime, _timestamp, AbstractTime],
month=None, day=None):
if month is None and day is None:
if isinstance(first_arg, AbstractTime):
first_arg = first_arg.ts
elif isinstance(first_arg, (int, str)):
first_arg = _timestamp(first_arg)
if isinstance(first_arg, (_datetime, _timestamp, datetime)):
first_arg = first_arg.date()
self.ts = _timestamp(first_arg) # remove hour, minutes and so on
elif all((month, day)):
self.ts = _timestamp(first_arg, month, day)
else:
raise ValueError("Please specify three integers (year, month, day) "
"or object or create date from (string, int timestamp, "
"pandas.Timestamp, otp.datetime, otp.date, "
"datetime.datetime, datetime.date)")
def __str__(self):
return self.ts.strftime("%Y-%m-%d")
def __repr__(self):
return self.ts.strftime("%Y-%m-%d")
def to_str(self, format="%Y%m%d"):
"""
Convert date to string, by default it will be in YYYYMMDD format.
Parameters
----------
format: str
strftime format of string to convert to.
Returns
-------
result: str
"""
return self.ts.strftime(format)
class _integer_str(type):
def __str__(cls):
return cls.__name__
class _integer(int, metaclass=_integer_str):
def __new__(cls, value, *args, **kwargs):
type_size = 8 * ctypes.sizeof(cls._CTYPE)
if cls._UNSIGNED:
min_value, max_value = 0, (2 ** type_size) - 1
else:
min_value, max_value = -(2 ** (type_size - 1)), (2 ** (type_size - 1)) - 1
if not min_value <= value <= max_value:
raise ValueError(f"{cls.__name__} values must be between {min_value} and {max_value}")
return super().__new__(cls, value, *args, **kwargs)
def __get_result(self, value):
if isinstance(value, int):
return self.__class__(self._CTYPE(value).value)
return value
def __add__(self, other):
return self.__get_result(
super().__add__(other)
)
def __radd__(self, other):
return self.__get_result(
super().__radd__(other)
)
def __sub__(self, other):
return self.__get_result(
super().__sub__(other)
)
def __rsub__(self, other):
return self.__get_result(
super().__rsub__(other)
)
def __mul__(self, other):
return self.__get_result(
super().__mul__(other)
)
def __rmul__(self, other):
return self.__get_result(
super().__rmul__(other)
)
def __truediv__(self, other):
return self.__get_result(
super().__truediv__(other)
)
def __rtruediv__(self, other):
return self.__get_result(
super().__rtruediv__(other)
)
def __str__(self):
return str(int(self))
def __repr__(self):
return f"{self.__class__.__name__}({self})"
[docs]class ulong(_integer):
"""
OneTick data type representing unsigned long integer.
The size of the type is not specified and may vary across different systems.
Most commonly it's a 8-byte type with allowed values from 0 to 2**64 - 1.
Note that the value is checked to be valid in constructor,
but no overflow checking is done when arithmetic operations are performed.
Examples
--------
>>> t = otp.Tick(A=otp.ulong(1))
>>> t['B'] = otp.ulong(1) + 1
>>> t.schema
{'A': <class 'onetick.py.types.ulong'>, 'B': <class 'onetick.py.types.ulong'>}
Note that arithmetic operations may result in overflow.
Here we get 2**64 - 1 instead of -1.
>>> t = otp.Tick(A=otp.ulong(0) - 1)
>>> otp.run(t) # doctest: +SKIP
Time A
0 2003-12-01 18446744073709551615
"""
_CTYPE = ctypes.c_ulong
_UNSIGNED = True
[docs]class uint(_integer):
"""
OneTick data type representing unsigned integer.
The size of the type is not specified and may vary across different systems.
Most commonly it's a 4-byte type with allowed values from 0 to 2**32 - 1.
Note that the value is checked to be valid in constructor,
but no overflow checking is done when arithmetic operations are performed.
Examples
--------
>>> t = otp.Tick(A=otp.uint(1))
>>> t['B'] = otp.uint(1) + 1
>>> t.schema
{'A': <class 'onetick.py.types.uint'>, 'B': <class 'onetick.py.types.uint'>}
Note that arithmetic operations may result in overflow.
Here we get 2**32 - 1 instead of -1.
>>> t = otp.Tick(A=otp.uint(0) - 1)
>>> otp.run(t) # doctest: +SKIP
Time A
0 2003-12-01 4294967295
"""
_CTYPE = ctypes.c_uint
_UNSIGNED = True
[docs]class byte(_integer):
"""
OneTick data type representing byte integer.
The size of the type is not specified and may vary across different systems.
Most commonly it's a 1-byte type with allowed values from -128 to 127.
Note that the value is checked to be valid in constructor,
but no overflow checking is done when arithmetic operations are performed.
Examples
--------
>>> t = otp.Tick(A=otp.byte(1))
>>> t['B'] = otp.byte(1) + 1
>>> t.schema
{'A': <class 'onetick.py.types.byte'>, 'B': <class 'onetick.py.types.byte'>}
Note that arithmetic operations may result in overflow.
Here we get 127 instead of -129.
>>> t = otp.Tick(A=otp.byte(-128) - 1)
>>> otp.run(t)
Time A
0 2003-12-01 127
"""
_CTYPE = ctypes.c_byte
_UNSIGNED = False
[docs]class short(_integer):
"""
OneTick data type representing short integer.
The size of the type is not specified and may vary across different systems.
Most commonly it's a 2-byte type with allowed values from -32768 to 32767.
Note that the value is checked to be valid in constructor,
but no overflow checking is done when arithmetic operations are performed.
Examples
--------
>>> t = otp.Tick(A=otp.short(1))
>>> t['B'] = otp.short(1) + 1
>>> t.schema
{'A': <class 'onetick.py.types.short'>, 'B': <class 'onetick.py.types.short'>}
Note that arithmetic operations may result in overflow.
Here we get 32767 instead of -32769.
>>> t = otp.Tick(A=otp.short(-32768) - 1)
>>> otp.run(t)
Time A
0 2003-12-01 32767
"""
_CTYPE = ctypes.c_short
_UNSIGNED = False
# ------------------- #
def type2str(t):
if t is int:
return "long"
if t is str:
return "string"
if t is float:
return "double"
if t is None:
return ''
return str(t)
def str2type(type_name: str):
"""Converts OneTick type by its name into Python/OTP domain type
Args:
type_name (str): name of type from CSV or OneTick DB, possible values listed here:
http://solutions.pages.soltest.onetick.com/iac/onetick-server/ep_guide/EP/FieldTypeDeclarations.htm#supported_field_types
Returns:
class: Python/OTP type representing OneTick type
"""
if type_name in ["long", "int", "time32"]:
return int
if type_name == "byte":
return byte
if type_name == "short":
return short
if type_name == "uint":
return uint
if type_name == "ulong":
return ulong
elif type_name in ["double", "float"]:
return float
elif type_name == "decimal":
return decimal
elif type_name == "msectime":
return msectime
elif type_name == "nsectime":
return nsectime
elif type_name in ["string", "matrix", f"string[{string.DEFAULT_LENGTH}]"]:
return str
elif type_name == "varstring":
return varstring
elif type_name.find("string") != -1:
length = int(type_name[type_name.find("[") + 1:type_name.find("]")])
return string[length]
return None
# TODO: move this union of types to some common place
def datetime2expr(
dt: typing.Union[_datetime, _date, pd.Timestamp, date, datetime],
timezone: str = None,
timezone_naive: str = None,
) -> str:
"""
Convert python datetime values to OneTick string representation.
If `dt` is timezone-aware then timezone will be taken from `dt` value.
If `dt` is timezone-naive then timezone specified with otp.config['tz'] or otp.run() will be used.
Parameters
----------
dt
date or datetime value
timezone: str
This timezone will be used unconditionally.
timezone_naive: str
This timezone will be used if `dt` is timezone-naive.
"""
dt_str = dt.strftime('%Y-%m-%d %H:%M:%S.%f')
if isinstance(dt, (pd.Timestamp, datetime)):
dt_str += f'{dt.nanosecond:03}'[-3:]
else:
dt_str += '000'
timezone = timezone or get_timezone_from_datetime(dt) or timezone_naive
timezone = f'"{timezone}"' if timezone else '_TIMEZONE'
return f'PARSE_NSECTIME("%Y-%m-%d %H:%M:%S.%J", "{dt_str}", {timezone})'
def value2str(v):
"""
Converts a python value from the `v` parameter into OneTick format.
"""
if issubclass(type(v), str):
# there is no escape, so replacing double quotes with concatenation with it
return '"' + str(v).replace('"', '''"+'"'+"''') + '"'
if isinstance(v, decimal):
return f'DECIMAL({v})'
if isinstance(v, float) and not (isinstance(v, (_inf, _nan))):
# PY-286: support science notation
s = str(v)
if "e" in s:
s = f"{v:.20f}".rstrip("0")
if s == "nan":
return str(nan)
return s
if is_time_type(v):
return datetime2expr(v)
if isinstance(v, nsectime):
# we do not need the same for msectime because it works as is
if int(v) > 15e12: # it is 2445/5/1
return f'NSECTIME({v})'
# This branch is for backward compatibility. Originally here was a bug that
# allowed to pass only milliseconds as a value into the otp.nsectime constructor.
# Obviously we expect there only nanoseconds, and the built-in NSECTIME works only
# with nanoseconds.
warnings.warn('It seems that you are using number of milliseconds as nanoseconds. ')
return str(v)
def time2nsectime(time, timezone=None):
"""
Converts complex time types to nsectime timestamp.
Parameters
----------
time: datetime.datetime, datetime.date, otp.datetime, otp.date, pandas.Timestamp
time to convert
timezone:
convert timezone before nsectime calculation
Returns
-------
result: int
number of nanoseconds since epoch
"""
if isinstance(time, (_datetime, _date)):
time = pd.Timestamp(time)
elif isinstance(time, date):
time = datetime(time)
if timezone:
if otp.__build__ < "20220327120000": # accomodating legacy behaviour prior to 20220327-3 weekly build
time = time.replace(tzinfo=None)
else:
if time.tzinfo is None:
time = time.tz_localize(timezone)
else:
time = time.tz_convert(timezone)
return time.value
def is_time_type(time):
""" Returns true if argument is subclass of any time type
Checks if pass type is time type, currently checks for otp.date, otp.datetime,
pd.Timestamp, datetime.date, datetime.datetime
Parameters
----------
time:
object or type of the object
Returns
-------
result: bool
Return true if argument is time type
Examples
--------
>>> is_time_type(datetime) # OTdirective: skip-example: ;
True
>>> is_time_type(type(5)) # OTdirective: skip-example: ;
False
>>> is_time_type(datetime(2019, 1, 1)) # OTdirective: snippet-name: types.is time;
True
"""
time = time if inspect.isclass(time) else type(time)
# do not check for datetime.datetime and pd.Timestamp, because they are in the same hierarchy
# datetime.date -> datetime.datetime -> pd.Timestamp, where `->` means base class
return issubclass(time, (_date, datetime, date))
def next_day(dt: typing.Union[_date, _datetime, date, datetime, pd.Timestamp]) -> _datetime:
"""
Return next day of `dt` as datetime.datetime.
"""
dt = (dt + Day(1)).date()
return _datetime(dt.year, dt.month, dt.day)