import warnings
from abc import ABC
from onetick.py import types as ott
from onetick.py.core.column_operations import _methods
from onetick.py.core.column_operations._methods.op_types import (
are_ints_not_time,
are_time,
are_floats,
are_strings
)
[docs]class Expr:
"""
EP parameter's value can be set to an expression.
Expressions are evaluated before parameters are actually passed to event processors.
See also
--------
:py:meth:`~onetick.py.Operation.expr`
"""
def __init__(self, operation):
self.operation = operation
def __str__(self):
return f'expr({self.operation})'
[docs]class Operation(ABC):
"""
:py:class:`~onetick.py.Source` column operation container.
This is the object you get when applying most operations on :py:class:`~onetick.py.Column`
or on other operations.
Eventually you can add a new column using the operation you got or pass it as a parameter
to some functions.
Examples
--------
>>> t = otp.Tick(A=1)
>>> t['A']
Column(A, <class 'int'>)
>>> t['A'] / 2
Operation((A) / (2))
>>> t['B'] = t['A'] / 2
>>> t['B']
Column(B, <class 'float'>)
"""
emulation_enabled = False
def __init__(self, op_func=None, op_params=None, dtype=None, obj_ref=None, op_str=None):
self._op_func = op_func
self._op_params = op_params
self.obj_ref = obj_ref
if op_func:
if op_str:
raise ValueError("You should specify either op_func or op_str")
op_str, dtype = self._evaluate_func()
# save it for later check and reevaluate func if name was changed by prefix adding or renaming
self._params_names = self._get_param_names()
self._op_str = op_str
self._dtype = dtype
def __bool__(self):
if Operation.emulation_enabled:
# True is default for classes without overriden __bool__
return True
raise TypeError('It is not allowed to use compare in if-else and while clauses')
def __str__(self):
if self._get_param_names() != self._params_names:
self._evaluate_func(set_fields=True)
return self.op_str
def __repr__(self):
return f"Operation({str(self)})"
@property
def dtype(self):
"""
Returns the type of the column or operation.
See also
--------
:py:meth:`Source.schema <onetick.py.Source.schema>`
Examples
--------
>>> t = otp.Tick(A=1, B=2.3, C='3')
>>> t['TIMESTAMP'].dtype
<class 'onetick.py.types.nsectime'>
>>> t['A'].dtype
<class 'int'>
>>> t['B'].dtype
<class 'float'>
>>> t['C'].dtype
<class 'str'>
"""
dtype = self._dtype
if not dtype:
op_str, dtype = self._evaluate_func(set_fields=True)
return dtype
@property
def op_str(self):
op_str = self._op_str
if not op_str:
op_str, dtype = self._evaluate_func(set_fields=True)
return op_str
@property
def expr(self):
"""
Get expression to use in EP parameters.
See also
--------
:py:class:`~onetick.py.core.column_operations.base.Expr`
"""
return Expr(self)
[docs] def round(self, precision=None):
"""
Rounds input column with specified `precision`.
Parameters
----------
precision: int
Number from -12 to 12.
Positive precision is precision after the floating point.
Negative precision is precision before the floating point.
See also
--------
__round__
Examples
--------
>>> t = otp.Tick(A=1234.5678)
>>> t['B'] = t['A'].round()
>>> t['C'] = t['A'].round(2)
>>> t['D'] = t['A'].round(-2)
>>> t.to_df()
Time A B C D
0 2003-12-01 1234.5678 1235 1234.57 1200.0
Returns
-------
Operation
"""
return round(self, precision)
[docs] def map(self, arg, default=None):
"""
Map values of the column to new values according to the mapping in ``arg``.
If the value is not in the mapping, it is set to the ``default`` value.
If ``default`` value is not set, it is set to default value for the column type.
Parameters
----------
arg: dict
Mapping from old values to new values.
All values must have the same type, compatible with the column type.
default: simple value or Column or Operation
Default value if no mapping is found in ``arg``.
By default, it is set to default value for the column type.
(0 for numbers, empty string for strings, etc.)
Examples
--------
>>> t = otp.Ticks(A=[1, 2, 3, 4, 5])
>>> t['B'] = t['A'].map({1: 10, 2: 20, 3: 30})
>>> t.to_df()
Time A B
0 2003-12-01 00:00:00.000 1 10
1 2003-12-01 00:00:00.001 2 20
2 2003-12-01 00:00:00.002 3 30
3 2003-12-01 00:00:00.003 4 0
4 2003-12-01 00:00:00.004 5 0
Example with ``default`` parameter set:
>>> t = otp.Ticks(A=[1, 2, 3, 4, 5])
>>> t['B'] = t['A'].map({1: 10, 2: 20, 3: 30}, default=-1)
>>> t.to_df()
Time A B
0 2003-12-01 00:00:00.000 1 10
1 2003-12-01 00:00:00.001 2 20
2 2003-12-01 00:00:00.002 3 30
3 2003-12-01 00:00:00.003 4 -1
4 2003-12-01 00:00:00.004 5 -1
Returns
-------
Operation
"""
if not isinstance(arg, dict) or not arg:
raise TypeError("map() argument must be a dict with keys and values to map")
try:
values_type = ott.get_type_by_objects(arg.values())
except TypeError as e:
raise TypeError("map() argument must be a dict with same types for all values") from e
if default is not None:
try:
default_type = ott.get_type_by_objects([default])
ott.get_type_by_objects([default_type, values_type])
except TypeError as e:
raise TypeError(
f"map() default value type {default_type} must be compatible with values type {values_type}"
) from e
try:
keys_type = ott.get_type_by_objects(arg.keys())
except TypeError as e:
raise TypeError("map() argument must be a dict with same types for all keys") from e
try:
ott.get_type_by_objects([keys_type, self.dtype])
except TypeError as e:
raise TypeError(f"map() keys type {keys_type} must be compatible with column type {self.dtype}") from e
return _Operation(_methods._map, [self, arg, values_type, default])
[docs] def apply(self, lambda_f):
"""
Apply function or type to column
Parameters
----------
lambda_f: type or callable
if type - will convert column to requested type
if callable - Will recursively observe all conditions and emulate all
possible values for them to build whole calculation (branching) tree and
understand all final states, ie return values.
Using this information expression will be applied to column.
Examples
--------
Converting type of the column, e.g. string column to integer:
>>> data = otp.Ticks({'A': ['1', '2', '3']})
>>> data['B'] = data['A'].apply(int) + 10 # OTdirective: snippet-name: column operations.type convertation;
>>> data()
Time A B
0 2003-12-01 00:00:00.000 1 11
1 2003-12-01 00:00:00.001 2 12
2 2003-12-01 00:00:00.002 3 13
More complicated logic:
>>> data = otp.Ticks({'A': [-321, 0, 123]})
>>> data['SIGN'] = data['A'].apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0)
>>> data()
Time A SIGN
0 2003-12-01 00:00:00.000 -321 -1
1 2003-12-01 00:00:00.001 0 0
2 2003-12-01 00:00:00.002 123 1
"""
if lambda_f in [int, float, str, ott.msectime, ott.nsectime]:
return self._convert_to(lambda_f)
from onetick.py.core.lambda_object import apply_lambda
return apply_lambda(lambda_f, self)
[docs] def astype(self, to_type):
"""
Alias for the :meth:`apply` method with type.
See also
--------
:meth:`apply`
Examples
--------
>>> data = otp.Tick(A=1, B=2.2, C='3.3')
>>> data['A'] = data['A'].astype(str) + 'A'
>>> data['B'] = data['B'].astype(int) + 1
>>> data['C'] = data['C'].astype(float) + 0.1
>>> data()
Time A B C
0 2003-12-01 1A 3.0 3.4
"""
return self.apply(to_type)
[docs] def isin(self, *items):
"""
Check if column's value is in ``items``.
Parameters
----------
items
possible values
Returns
-------
Operation
See also
--------
:py:meth:`Source.__getitem__`
Examples
--------
>>> data = otp.Ticks(A=['a', 'b', 'c'])
>>> data['B'] = data['A'].isin('a', 'c')
>>> data()
Time A B
0 2003-12-01 00:00:00.000 a 1.0
1 2003-12-01 00:00:00.001 b 0.0
2 2003-12-01 00:00:00.002 c 1.0
Can be used as filter
>>> data = otp.Ticks(A=[1, 2, 3, 0])
>>> yes, no = data[data["A"].isin(0, 1)] # OTdirective: snippet-name: column operations.is in.constant;
>>> otp.run(yes)[["A"]]
A
0 1
1 0
columns and expressions are also supported
>>> # OTdirective: snippet-name: column operations.is in.from fields;
>>> data = otp.Ticks(A=["ab", "cv", "bc", "a", "d"], B=["a", "c", "b", "a", "a"])
>>> yes, no = data[data["A"].isin(data["B"], data["B"] + "b")]
>>> otp.run(yes)[["A", "B"]]
A B
0 ab a
1 a a
"""
return _Operation(_methods.isin, [self, items])
[docs] def fillna(self, value):
"""
Fill :py:class:`~onetick.py.nan` values with ``value``
Parameters
----------
value: float, int
value to use instead :py:class:`~onetick.py.nan`
Examples
--------
>>> data = otp.Ticks({'A': [1, otp.nan, 2]})
>>> data['A'] = data['A'].fillna(100) # OTdirective: snippet-name: column operations.fillna;
>>> data()
Time A
0 2003-12-01 00:00:00.000 1.0
1 2003-12-01 00:00:00.001 100.0
2 2003-12-01 00:00:00.002 2.0
"""
return _Operation(_methods.fillna, [self, value])
@property
def str(self):
"""
Property that provide access to string accessors
"""
if issubclass(self.dtype, str):
from onetick.py.core.column_operations.accessors.str_accessor import _StrAccessor
return _StrAccessor(self)
else:
raise TypeError(".str accessor is available only for string type columns")
@property
def dt(self):
"""
Property that provide access to datetime accessors
"""
if issubclass(self.dtype, ott.nsectime) \
or issubclass(self.dtype, ott.msectime):
from onetick.py.core.column_operations.accessors.dt_accessor import _DtAccessor
return _DtAccessor(self)
else:
raise TypeError(".dt accessor is available only for datetime type columns")
@property
def float(self):
"""
Property that provide access to float accessors
"""
if issubclass(self.dtype, float):
from onetick.py.core.column_operations.accessors.float_accessor import _FloatAccessor
return _FloatAccessor(self)
else:
raise TypeError(".float accessor is available only for float type columns")
[docs] def __abs__(self):
"""
Return the absolute value of float or int column.
Examples
--------
>>> t = otp.Tick(A=-1, B=-2.3)
>>> t['A'] = abs(t['A'])
>>> t['B'] = abs(t['B'])
>>> t()[['A', 'B']]
A B
0 1 2.3
"""
return _Operation(_methods.abs, [self])
[docs] def __round__(self, precision=None):
"""
Rounds value with specified ``precision``.
Parameters
----------
precision: int
Number from -12 to 12.
Positive precision is precision after the floating point.
Negative precision is precision before the floating point.
Examples
--------
>>> t = otp.Tick(A=1234.5678)
>>> t['B'] = round(t['A'])
>>> t['C'] = round(t['A'], 2)
>>> t['D'] = round(t['A'], -2)
>>> t.to_df()
Time A B C D
0 2003-12-01 1234.5678 1235 1234.57 1200.0
Returns
-------
Operation
"""
return _Operation(_methods.round, [self, precision])
def __pos__(self):
# TODO: is it working in OneTick?
return _Operation(_methods.pos, [self])
[docs] def __neg__(self):
"""
Return the negative value of float or int column.
Examples
--------
>>> t = otp.Tick(A=1, B=2.3)
>>> t['A'] = -t['A']
>>> t['B'] = -t['B']
>>> t()[['A', 'B']]
A B
0 -1 -2.3
"""
return _Operation(_methods.neg, [self])
[docs] def __add__(self, other):
"""
Return the sum of column and ``other`` value.
Parameters
----------
other: int, float, str, datetime offsets, Column
Examples
--------
>>> t = otp.Tick(A=1, B=2.3, C='c', D=otp.datetime(2022, 5, 12))
>>> t['A'] = t['A'] + t['B']
>>> t['B'] = t['B'] + 1
>>> t['C'] = t['C'] + '_suffix'
>>> t['D'] = t['D'] + otp.Day(1)
>>> t()[['A', 'B', 'C', 'D']]
A B C D
0 3.3 3.3 c_suffix 2022-05-13
"""
return _Operation(_methods.add, [self, other])
[docs] def __radd__(self, other):
"""
See also
--------
__add__
Examples
--------
>>> t = otp.Tick(A=1, B=2.3, C='c', D=otp.datetime(2022, 5, 12))
>>> t['A'] += t['B']
>>> t['B'] += 1
>>> t['C'] += '_suffix'
>>> t['D'] += otp.Day(1)
>>> t()[['A', 'B', 'C', 'D']]
A B C D
0 3.3 3.3 c_suffix 2022-05-13
"""
return _Operation(_methods.add, [other, self])
[docs] def __sub__(self, other):
"""
Subtract ``other`` value from column.
Parameters
----------
other: int, float, datetime offsets, Column
Examples
--------
>>> t = otp.Tick(A=1, B=2.3, D=otp.datetime(2022, 5, 12))
>>> t['A'] = t['A'] - t['B']
>>> t['B'] = t['B'] - 1
>>> t['D'] = t['D'] - otp.Day(1)
>>> t()[['A', 'B', 'D']]
A B D
0 -1.3 1.3 2022-05-11
"""
return _Operation(_methods.sub, [self, other])
[docs] def __rsub__(self, other):
"""
See also
--------
__sub__
Examples
--------
>>> t = otp.Tick(A=1, B=2.3, D=otp.datetime(2022, 5, 12))
>>> t['A'] -= t['B']
>>> t['B'] -= 1
>>> t['D'] -= otp.Day(1)
>>> t()[['A', 'B', 'D']]
A B D
0 -1.3 1.3 2022-05-11
"""
return _Operation(_methods.sub, [other, self])
[docs] def __mul__(self, other):
"""
Multiply column by ``other`` value.
Parameters
----------
other: int, float, str, Column
Examples
--------
>>> t = otp.Tick(A=1, B=2.3, C='c')
>>> t['A'] = t['A'] * t['B']
>>> t['B'] = t['B'] * 2
>>> t['C'] = t['C'] * 3
>>> t()[['A', 'B', 'C']]
A B C
0 2.3 4.6 ccc
"""
return _Operation(_methods.mul, [self, other])
[docs] def __rmul__(self, other):
"""
See also
--------
__mul__
Examples
--------
>>> t = otp.Tick(A=1, B=2.3, C='c')
>>> t['A'] *= t['B']
>>> t['B'] *= 2
>>> t['C'] *= 3
>>> t()[['A', 'B', 'C']]
A B C
0 2.3 4.6 ccc
"""
return _Operation(_methods.mul, [other, self])
[docs] def __truediv__(self, other):
"""
Divide column by ``other`` value.
Parameters
----------
other: int, float, Column
Examples
--------
>>> t = otp.Tick(A=1, B=2.3)
>>> t['A'] = t['A'] / t['B']
>>> t['B'] = t['B'] / 2
>>> t()[['A', 'B']]
A B
0 0.434783 1.15
"""
return _Operation(_methods.div, [self, other])
[docs] def __rtruediv__(self, other):
"""
See also
--------
__truediv__
Examples
--------
>>> t = otp.Tick(A=1, B=2.3)
>>> t['A'] /= t['B']
>>> t['B'] /= 2
>>> t()[['A', 'B']]
A B
0 0.434783 1.15
"""
return _Operation(_methods.div, [other, self])
[docs] def __mod__(self, other):
"""
Return modulo of division of int column by ``other`` value.
Parameters
----------
other: int, Column
Examples
--------
>>> t = otp.Tick(A=3, B=3)
>>> t['A'] = t['A'] % t['B']
>>> t['B'] = t['B'] % 2
>>> t()[['A', 'B']]
A B
0 0 1
"""
return _Operation(_methods.mod, [self, other])
[docs] def __invert__(self):
"""
Return inversion of filter operation.
Examples
--------
>>> t = otp.Ticks(A=range(4))
>>> t, _ = t[~(t['A'] > 1)]
>>> t()[['A']]
A
0 0
1 1
"""
result = _Operation(_methods.invert, [self])
return result
[docs] def __eq__(self, other):
"""
Return equality in filter operation.
Examples
--------
>>> t = otp.Ticks(A=range(4))
>>> t, _ = t[(t['A'] == 1)]
>>> t()[['A']]
A
0 1
"""
result = _Operation(_methods.eq, [self, other])
return result
[docs] def __ne__(self, other):
"""
Return inequality in filter operation.
Examples
--------
>>> t = otp.Ticks(A=range(4))
>>> t, _ = t[(t['A'] != 1)]
>>> t()[['A']]
A
0 0
1 2
2 3
"""
result = _Operation(_methods.ne, [self, other])
return result
[docs] def __or__(self, other):
"""
Return logical ``or`` in filter operation.
Examples
--------
>>> t = otp.Ticks(A=range(4))
>>> t, _ = t[(t['A'] == 1) | (t['A'] == 2)]
>>> t()[['A']]
A
0 1
1 2
"""
result = _Operation(_methods.or_, [self, other])
return result
[docs] def __and__(self, other):
"""
Return logical ``and`` in filter operation.
Examples
--------
>>> t = otp.Ticks(A=[1, 1], B=[1, 2])
>>> t, _ = t[(t['A'] == 1) & (t['B'] == 1)]
>>> t()[['A', 'B']]
A B
0 1 1
"""
result = _Operation(_methods.and_, [self, other])
return result
[docs] def __le__(self, other):
"""
Return <= in filter operation.
Examples
--------
>>> t = otp.Ticks(A=range(4))
>>> t, _ = t[t['A'] <= 2]
>>> t()[['A']]
A
0 0
1 1
2 2
"""
result = _Operation(_methods.le, [self, other])
return result
[docs] def __lt__(self, other):
"""
Return < in filter operation.
Examples
--------
>>> t = otp.Ticks(A=range(4))
>>> t, _ = t[t['A'] < 2]
>>> t()[['A']]
A
0 0
1 1
"""
result = _Operation(_methods.lt, [self, other])
return result
[docs] def __ge__(self, other):
"""
Return >= in filter operation.
Examples
--------
>>> t = otp.Ticks(A=range(4))
>>> t, _ = t[t['A'] >= 2]
>>> t()[['A']]
A
0 2
1 3
"""
result = _Operation(_methods.ge, [self, other])
return result
[docs] def __gt__(self, other):
"""
Return > in filter operation.
Examples
--------
>>> t = otp.Ticks(A=range(4))
>>> t, _ = t[t['A'] > 2]
>>> t()[['A']]
A
0 3
"""
result = _Operation(_methods.gt, [self, other])
return result
def _invalidate_cache(self):
self._params_names = None
if self._op_params:
for op in self._op_params:
if isinstance(op, _Operation):
op._invalidate_cache()
def _evaluate_func(self, *, set_fields=False):
if self._op_func:
op_str, dtype = self._op_func(*self._op_params) if self._op_params else self._op_func()
if set_fields:
self._params_names = self._get_param_names()
self._op_str = op_str
self._dtype = dtype
return op_str, dtype
def _get_param_names(self):
return [str(param) for param in self._op_params] if self._op_params else []
def _convert_to(self, to_type):
return _Operation(_methods.CONVERSIONS[self.dtype, to_type], [self])
def _make_python_way_bool_expression(self):
dtype = ott.get_object_type(self)
if dtype is bool:
return self
if are_ints_not_time(dtype):
self = _Operation(_methods.ne, (self, 0))
elif are_time(dtype):
self = _Operation(_methods.ne, (self._convert_to(int), 0))
elif are_floats(dtype):
self = _Operation(_methods.ne, (self, 0.0))
elif are_strings(dtype):
self = _Operation(_methods.ne, (self, ""))
else:
raise TypeError("Filter expression should return bool, int, float or string")
return self
_Operation = Operation # alias to support backward compatibility
[docs]class Raw(Operation):
"""
Data type representing raw OneTick expression.
Examples
--------
>>> t = otp.Tick(A=1)
>>> t['A'] = '_TIMEZONE'
>>> t['B'] = otp.raw('_TIMEZONE', dtype=str)
>>> t(timezone='Asia/Yerevan')
Time A B
0 2003-12-01 _TIMEZONE Asia/Yerevan
"""
def __init__(self, raw, dtype):
if dtype is str:
warnings.warn(
f'Be careful, default string length in OneTick is {ott.string.DEFAULT_LENGTH}.'
"Length of the result raw expression can't be calculated automatically, "
"so you'd better use onetick.py.string type."
)
super().__init__(op_str=raw, dtype=dtype)