Source code for onetick.py.db.db

# pylama:ignore=W0237
import logging
import os
import warnings

from contextlib import contextmanager
from datetime import datetime, timedelta
from collections import defaultdict
from dateutil.relativedelta import relativedelta
from typing import List, Dict, Union
import subprocess

from onetick import py as otp
from onetick.py.core import db_constants as constants
from onetick.py import utils, sources, session, configuration

import pandas
from uuid import uuid4


def _tick_type_detector(tick_type, obj):
    if tick_type is not None:
        return tick_type

    type2traits = {
        "ORDER": ["ID", "BUY_FLAG", "SIDE", "QTY", "QTY_FILLED", "QTY", "ORDTYPE", "PRICE", "PRICE_FILLED"],
        "QTE": ["ASK_PRICE", "BID_PRICE", "ASK_SIZE", "BID_SIZE"],
        "NBBO": ["ASK_PRICE", "BID_PRICE", "ASK_SIZE", "BID_SIZE", "BID_EXCHANGE", "ASK_EXCHANGE"],
        "TRD": ["PRICE", "SIZE"],
    }

    type2count = defaultdict(lambda: 0)
    max_tt = "TRD"
    max_count = 0

    for column in obj.columns():
        for tt, traits in type2traits.items():
            if column in traits:
                type2count[tt] += 1

                if type2count[tt] > max_count:
                    max_count = type2count[tt]
                    max_tt = tt

    return max_tt


def write_to_db(src: 'otp.Source',
                dest: Union[str, 'otp.DB'],
                date: datetime.date,
                symbol: Union[str, 'otp.Column'],
                tick_type: Union[str, 'otp.Column'],
                timezone: str = None,
                execute: bool = True,
                **kwargs):
    """
    Writes source to the database.
    The main differences from otp.Source.write() function are
    automatic tick_type detection and executing the query right here.

    Parameters
    ----------
    src: :class:`otp.Source`
        source that will be written to the database.
    dest: str or :py:class:`otp.DB <onetick.py.DB>`
        database name or object.
    date: datetime or None
        date where to save data.
        Must be set if ``execute`` parameter is set to True and in this case
        query will be executed with start and end time set
        to the start and the end of this date.
    symbol: str or Column
        resulting symbol name string or column to get symbol name from.
    tick_type: str or Column
        resulting tick type string or column to get tick type from.
        If tick type is None then an attempt will be taken to get
        tick type name automatically based on the ``src`` source's schema.
        (ORDER, QTE, TRD and NBBO tick types are supported).
    timezone: str
        If ``execute`` parameter is set then this timezone
        will be used for running the query.
        By default, it is set to `otp.config.tz`.
    execute: bool
        execute the query right here or not.
        If True, then dataframe will be returned.
        (Probably empty, unless 'propagate' parameter is specified).
        Otherwise, modified copy of the source ``src`` will be returned.
    kwargs:
        other arguments that will be passed to :py:meth:`onetick.py.Source.write` function.
    """

    tick_type = _tick_type_detector(tick_type, src)

    if timezone is None:
        timezone = configuration.config.tz

    kwargs.setdefault('propagate', False)
    writer = src.write(dest, symbol, tick_type, date, **kwargs)

    if execute:
        start = getattr(date, 'ts', date)
        return otp.run(writer, start=start, end=(start + relativedelta(days=1)), timezone=timezone)
    return writer


class _DB(object):

    _LOCAL = False
    # this flag means that db section should be added to locator and acl (True)
    # or db locates on some ts (False)

    def __init__(
        self,
        name,
        src=None,
        date=None,
        symbol=None,
        tick_type=None,
        db_properties: Dict = None,
        db_locations: List = None,
        db_raw_data: Dict = None,
        db_feed: Dict = None,
        write=True,
    ):
        # we assume here that db_properties and db_locations fully prepared here or None (in case of remote db)
        self.name = name
        self.id, _, _ = name.partition("//")
        self._db_properties = db_properties
        self._db_locations = db_locations
        self._db_raw_data = db_raw_data
        self._db_feed = db_feed
        self._write = write
        if src is not None:
            self.add(src=src, date=date, symbol=symbol, tick_type=tick_type)

    @staticmethod
    def _format_params(params):
        res = {}
        for key, value in params.items():
            if isinstance(value, datetime):
                res[key] = value.strftime("%Y%m%d%H%M%S")
            else:
                res[key] = str(value)
        return res

    # TODO: move this method to DB
    def add(self, src, date=None, symbol=None, tick_type=None, timezone=None, **kwargs):
        """
        Add data to database.
        If ticks with the same timestamp are already presented in database old values won't be updated.

        Parameters
        ----------
        src: :class:`otp.Source`
            source that will be written to the database.
        date: datetime or None
            date of the day in which the data will be saved.
            The timestamps of the ticks should be between the start and the end of the day.
            Be default, it is set to `otp.config.default_date`.
        symbol: str or Column
            resulting symbol name string or column to get symbol name from.
            Be default, it is set to `otp.config.default_db_symbol`.
        tick_type: str or Column
            resulting tick type string or column to get tick type from.
            If tick type is None then an attempt will be taken to get
            tick type name automatically based on the ``src`` source's schema.
            (ORDER, QTE, TRD and NBBO tick types are supported).
        timezone: str
            This timezone will be used for running the query.
            By default, it is set to `otp.config.tz`.
        kwargs:
            other arguments that will be passed to :py:meth:`onetick.py.Source.write` function.

        Examples
        --------

        Data is saved to the specified date, symbol and tick type:
        (note that ``session`` is created before this example)

        >>> db = otp.DB('MYDB2')
        >>> db.add(otp.Ticks(A=[4, 5, 6]), date=otp.dt(2003, 1, 1), symbol='SMB', tick_type='TT')
        >>> session.use(db)

        We can get the same data by specifying the same parameters:

        >>> data = otp.DataSource(db, date=otp.dt(2003, 1, 1), symbols='SMB', tick_type='TT')
        >>> otp.run(data)
                             Time  A
        0 2003-01-01 00:00:00.000  4
        1 2003-01-01 00:00:00.001  5
        2 2003-01-01 00:00:00.002  6
        """
        if timezone is None:
            timezone = configuration.config.tz
        _date = date if date is not None else configuration.config.default_start_time
        _symbol = symbol if symbol is not None else configuration.config.default_db_symbol

        kwargs.setdefault('propagate', kwargs.get('propagate_ticks', False))

        res = self._session_handler(write_to_db, src, self.name, _date, _symbol, tick_type, timezone, **kwargs)

        # We need to keep backward-compatibility,
        # because before there was no ability to get written ticks
        if kwargs.get('propagate'):
            return res

    @property
    def properties(self):
        """
        Get dict of database properties.

        Returns
        -------
        dict

        Examples
        --------
        >>> otp.DB('X').properties
        {'symbology': 'BZX',
         'archive_compression_type': 'NATIVE_PLUS_GZIP',
         'tick_timestamp_type': 'NANOS'}
        """
        return self._db_properties

    @property
    def locations(self):
        """
        Get list of database locations.

        Returns
        -------
        list of dict

        Examples
        --------
        >>> otp.DB('X').locations # doctest:+ELLIPSIS
        [{'access_method': 'file',
          'start_time': '20021230000000',
          'end_time': '21000101000000',
          ...}]
        """
        return self._db_locations

    @property
    def raw_data(self):
        """
        Get dict of database raw configurations.

        Returns
        -------
        dict of dict

        Examples
        --------
        >>> db = otp.DB('RAW_EXAMPLE',
        ...     db_raw_data=[{
        ...         'id': 'PRIMARY_A',
        ...         'prefix': 'DATA.',
        ...         'locations': [
        ...             {'mount': 'mount1'}
        ...         ]
        ...     }]
        ... )
        >>> db.raw_data # doctest:+ELLIPSIS
        [{'id': 'PRIMARY_A', 'prefix': 'DATA.', 'locations': [{'mount': 'mount1', 'access_method': 'file', ...}]}]
        """
        return self._db_raw_data

    @property
    def feed(self):
        """
        Get dict of database feed configuration.

        Returns
        -------
        dict

        Examples
        --------
        >>> db = otp.DB('RAW_EXAMPLE',
        ...     db_raw_data=[{
        ...         'id': 'PRIMARY_A',
        ...         'prefix': 'DATA.',
        ...         'locations': [
        ...             {'mount': 'mount1'}
        ...         ]
        ...     }],
        ...     db_feed={'type': 'rawdb', 'raw_source': 'PRIMARY_A'},
        ... )
        >>> db.feed
        {'type': 'rawdb', 'raw_source': 'PRIMARY_A', 'format': 'native'}
        """
        return self._db_feed

    @property
    def symbols(self):
        result = self._session_handler(self._symbols)
        return result if result else []

    def _session_handler(self, func, *args, **kwargs):
        """
        Handler to check if database is already in locator and
        run function with separate session or using current

        :param func: function to run
        """
        __result = None

        _session = session.Session._instance
        close_session = False
        _remove_from_locator = False
        _remove_from_acl = False
        if _session is None:
            close_session = True
            _session = session.Session()

        try:
            if self._LOCAL:
                if self.id not in _session.locator.databases:
                    if self.id != self.name:  # Derived DB
                        raise Exception(
                            "You need include derived DB into the session use the .use method before adding "
                            " data there."
                        )  # TODO: remove restriction with this fix
                        # https://onemarketdata.atlassian.net/browse/PY-134

                    _remove_from_locator = True
                    _session.locator.add(self)
                if self.id not in _session.acl.databases:
                    _remove_from_acl = True
                    _session.acl.add(self)
            __result = func(*args, **kwargs)
        finally:
            if close_session:
                _session.close()
            else:
                if self._LOCAL:
                    if _remove_from_locator:
                        _session.locator.remove(self)
                    if _remove_from_acl:
                        _session.acl.remove(self)

        return __result

    def _symbols(self):
        src = sources.Symbols(self)
        symbols = otp.run(src)
        result = []
        if symbols.empty:
            return []
        for s in list(symbols["SYMBOL_NAME"]):
            result.append(s.split(":")[-1])
        return result

    def __repr__(self):
        return "DB: " + self.name

    def __str__(self):
        return self.name


[docs]class DB(_DB): """ Creates database object. Used to configure databases locator and access list properties and to write data to the database. By default creates temporary database. This object can then be :py:meth:`used <onetick.py.Session.use>` in :py:class:`onetick.py.Session`. Note: already presented ticks can't be updated. Data can only be added during db creation or with :py:meth:`add` method. Parameters ---------- name : str Database name In case you want to specify derived db, you should specify in "parent//derived" format. Derived database inherits parent database's properties. src : optional Data to add to database clean_up : bool, optional Flag that controls temporary database cleanup date : datetime.datetime, optional src will be added to this date symbol : str, optional symbol name to add data tick_type : str, optional tick type to add data db_properties : :obj:`dict`, optional Properties of database to add to locator db_locations : :obj:`list` of :obj:`dict`, optional Locations of database to add to locator. This parameter is list, because database in locator can have several location sections. If not specified, some temporary directory is used as database location. db_raw_data: :obj:`list` of :obj:`dict`, optional Raw databases' configuration. db_feed: dict, optional Feed configuration. write : bool, optional Flag that controls access to write to database destroy_access : bool, optional Flag that controls access to destroy to database Examples -------- Database can be initialized along with data: >>> data = otp.Ticks(X=['hello', 'world!']) >>> db = otp.DB('MYDB', data) You can specify derived db by using ``//`` as a separator: >>> data = otp.Ticks(X=['parent1', 'parent2']) >>> db = otp.DB('DB_A', data) >>> db.add(data) >>> data_derived = otp.Ticks(X=['derived1', 'derived2']) >>> db_derived = otp.DB('DB_A//DB_D') >>> session.use(db_derived) >>> db_derived.add(data_derived) """ _LOCAL = True def __init__( self, name=None, src=None, date=None, symbol=None, tick_type=None, kind='archive', db_properties=None, db_locations=None, db_raw_data=None, db_feed=None, write=True, clean_up=True, destroy_access=False, ): if name is not None and not isinstance(name, str): message = f"Database name expected to be string got {type(name)}" logging.error(message) raise TypeError(message) self._clean_up = clean_up self._destroy_access = destroy_access self._path = None self._db_suffix = "" if name: self._db_suffix = name else: # Mostly for temporary databases name = uuid4().hex.upper() self._db_suffix = "db_" + name db_properties = self._prepare_db_properties(db_properties) db_day_boundary_tz_set = 'day_boundary_tz' in db_properties.keys() db_locations = self._prepare_db_locations(db_locations, db_day_boundary_tz_set=db_day_boundary_tz_set, kind=kind) db_raw_data = self._prepare_db_raw_data(db_raw_data, db_properties) db_feed = self._prepare_db_feed(db_feed) if isinstance(src, pandas.DataFrame): csv_path = os.path.join(self._tmp_dir.path, uuid4().hex.upper() + ".csv") src.to_csv(csv_path, index=False) src = sources.CSV(csv_path) super().__init__( name=name, src=src, date=date, symbol=symbol, tick_type=tick_type, db_properties=db_properties, db_locations=db_locations, db_raw_data=db_raw_data, db_feed=db_feed, write=write, ) def _prepare_db_properties(self, properties): if properties is None: properties = {} # set default properties if they are not specified properties.setdefault("symbology", configuration.config.default_symbology) properties.setdefault("archive_compression_type", constants.compression_type.NATIVE_PLUS_GZIP) properties.setdefault("tick_timestamp_type", "NANOS") return self._format_params(properties) def _create_db(self): logging.debug(f'Creating temporary directory for db "{self._db_suffix}"') dirs_list = self._db_suffix.replace("//", " DERIVED ").split() dir_name = '' for cur_dir in dirs_list: dir_name = os.path.join(dir_name, cur_dir) self._tmp_dir = utils.TmpDir(dir_name, clean_up=self._clean_up) if not self._path: self._path = self._tmp_dir.path def _prepare_db_locations(self, locations, db_day_boundary_tz_set=None, kind=None, default_location=None): if not locations: locations = [{}] result = [] # set default properties if they are not specified for location in locations: location.setdefault("access_method", constants.access_method.FILE) location.setdefault("start_time", constants.DEFAULT_START_DATE - timedelta(days=2)) location.setdefault("end_time", constants.DEFAULT_END_DATE + timedelta(days=1)) if not db_day_boundary_tz_set and db_day_boundary_tz_set is not None: # If the day_boundary_tz is not set database-wide, then we want it to have # a default value for each location day_boundary_tz = utils.default_day_boundary_tz(self._db_suffix) if day_boundary_tz: location.setdefault("day_boundary_tz", day_boundary_tz) if 'location' not in location: methods = {constants.access_method.SOCKET, constants.access_method.MEMORY} if location['access_method'] in methods: raise ValueError("Parameter 'location' must be specified when parameter" f" 'access_method' is set to {methods}") if not default_location: self._create_db() location['location'] = self._path else: location['location'] = default_location if kind == 'accelerator': location.setdefault("archive_duration", "continuous") location.setdefault("growable_archive", "true") # TODO: think what to do if there will be several locations result.append(location) return list(map(self._format_params, result)) def _prepare_db_raw_data(self, raw_data, db_properties): if not raw_data: return [] raw_ids = set() auto_discover_mounts = db_properties.get('auto_discover_mounts', '').lower() == 'yes' default_location = None for raw_db in raw_data: raw_db.setdefault('id', 'PRIMARY') if raw_db['id'] in raw_ids: raise ValueError("Parameter 'id' must be set and must be unique for raw databases") raw_ids.add(raw_db['id']) if 'prefix' not in raw_db: raise ValueError("Parameter 'prefix' must be specified for raw database") if self._path is not None and default_location is None: default_location = utils.TmpDir(rel_path='raw', base_dir=self._path, clean_up=self._clean_up).path raw_db['locations'] = self._prepare_db_locations(raw_db.get('locations'), db_day_boundary_tz_set=None, default_location=default_location) if auto_discover_mounts and len(raw_db['locations']) > 1: raise ValueError("Only one location must be specified for raw database" " when parameter 'auto_discover_mounts' is specified for database") for location in raw_db['locations']: if 'mount' not in location and not auto_discover_mounts: raise ValueError("Parameter 'mount' must be specified for raw database location") if 'mount' in location and auto_discover_mounts: raise ValueError("Parameter 'mount' must not be specified for raw database location" " when parameter 'auto_discover_mounts' is specified for database") return raw_data def _prepare_db_feed(self, feed): if not feed: return {} if 'type' not in feed: raise ValueError("Parameter 'type' must be specified for database feed") if feed['type'] == 'rawdb': feed.setdefault('format', 'native') formats = ('native', 'rt', 'ascii', 'xml') if feed['format'] not in formats: raise ValueError(f"Parameter 'format' must be one of {formats}") feed.setdefault('raw_source', 'PRIMARY') return self._format_params(feed)
""" Keep here as example of custom databases that can be defined in client code P_CME = DB( name="P_CME", db_properties={ "symbology": "CTA", "archive_compression_type": constants.compression_type.NATIVE_PLUS_GZIP, "tick_search_max_boundary_offset_sec": 1800, "tick_timestamp_type": "NANOS", }, db_locations=[ { "access_method": constants.access_method.SOCKET, "location": servers.laser, "start_time": datetime(year=2008, month=9, day=1), "end_time": constants.DEFAULT_END_DATE, } ], ) MS127 = DB( name="MS127", db_properties={ "symbology": "MSGR", "ref_data_db": "REF_DATA_MS127", "archive_compression_type": constants.compression_type.NATIVE_PLUS_GZIP, "ignore_previous_day_corrections_on_reload": "yes", }, db_locations=[ { "access_method": constants.access_method.SOCKET, "location": "laser:50025", "start_time": datetime(year=2010, month=12, day=31), "end_time": constants.DEFAULT_END_DATE, } ], ) MS44 = DB( name="MS44", db_properties={ "symbology": "MSGR", "ref_data_db": "REF_DATA_MS44", "archive_compression_type": constants.compression_type.NATIVE_PLUS_GZIP, "ignore_previous_day_corrections_on_reload": "yes", }, db_locations=[ { "access_method": constants.access_method.SOCKET, "location": "laser:50025", "start_time": datetime(year=2010, month=12, day=31), "end_time": constants.DEFAULT_END_DATE, } ], ) TAQ_NBBO = DB( name="TAQ_NBBO", db_properties={ "symbology": "BZX", "price_not_key": True, "memory_data_max_life_hours": 30, "memory_db_dir": "/onetick-tickdata-com/STORAGE_GATEWAY/DEEP_HISTORY/US_TED/NBBO/shmem", "mmap_db_compression_type": constants.compression_type.NATIVE_PLUS_GZIP, }, db_locations=[ { "access_method": constants.access_method.FILE, "location": "/onetick-tickdata-com/STORAGE_GATEWAY/DEEP_HISTORY/US_TED/NBBO/", "start_time": datetime(year=2001, month=1, day=1), "end_time": constants.DEFAULT_END_DATE, } ], ) NYSE_TAQ = DB( name="NYSE_TAQ", db_properties={ "symbology": "BZX", "price_not_key": True, "memory_data_max_life_hours": 30, "memory_db_dir": "/onetick-tickdata-com/STORAGE_GATEWAY/DEEP_HISTORY/US_TED/TAQ/shmem", "mmap_db_compression_type": constants.compression_type.NATIVE_PLUS_GZIP, }, db_locations=[ { "access_method": constants.access_method.FILE, "location": "/onetick-tickdata-com/STORAGE_GATEWAY/DEEP_HISTORY/US_TED/TAQ/", "start_time": datetime(year=2003, month=10, day=1), "end_time": constants.DEFAULT_END_DATE, } ], ) """
[docs]class RefDB(DB): """ Creates reference database object. Parameters ---------- name : str Database name clean_up : bool, optional Flag that controls temporary database cleanup db_properties : :obj:`dict`, optional Properties of database to add to locator db_location : :obj:`dict`, optional Location of database to add to locator. Reference database must have a single location, pointing to a continuous archive database. write : bool, optional Flag that controls access to write to database destroy_access : bool, optional Flag that controls access to destroy to database Examples -------- >>> properties = {'symbology': 'TICKER'} >>> location = {'archive_duration': 'continuous'} >>> ref_db = otp.RefDB('REF_DATA_MYDB', db_properties=properties, db_location=location) >>> session.use(ref_db) >>> >>> data = 'A||20100102000000|20100103000000|B||20100103000000|20100104000000|' >>> out, err = ref_db.put([otp.RefDB.SymbolNameHistory(data, 'TICKER')]) >>> b'Total ticks 8' in err and b'Total symbols 6' in err True >>> >>> properties = {'ref_data_db': ref_db.name, 'symbology': 'TICKER'} >>> db = otp.DB('MYDB', db_properties=properties) >>> session.use(db) >>> >>> data = otp.Ticks(X=['hello'], start=otp.dt(2010, 1, 2), end=otp.dt(2010, 1, 3)) >>> data = otp.run(data.write(db.name, 'A', 'MSG', date=otp.dt(2010, 1, 2))) >>> data = otp.Ticks(X=['world!'], start=otp.dt(2010, 1, 3), end=otp.dt(2010, 1, 4)) >>> data = otp.run(data.write(db.name, 'B', 'MSG', date=otp.dt(2010, 1, 3))) >>> >>> data = otp.DataSource(db.name, tick_type='MSG') >>> s_dt, e_dt, symbol_date = otp.dt(2010, 1, 1), otp.dt(2010, 1, 4), otp.dt(2010, 1, 2) >>> otp.run(data, symbols='A', start=s_dt, end=e_dt, symbol_date=symbol_date) Time X 0 2010-01-02 hello 1 2010-01-03 world! """ def __init__( self, name=None, kind='archive', db_properties=None, db_location=None, write=True, clean_up=True, destroy_access=False, ): # ref db must have a single location, pointing to a continuous archive database # (its location in the locator file must have archive_duration=continuous set) if db_location is None: db_location = {} db_location.setdefault('archive_duration', 'continuous') super().__init__( name=name, kind=kind, db_properties=db_properties, db_locations=[db_location], write=write, clean_up=clean_up, destroy_access=destroy_access, ) class Section(): """ Specification of a reference database section. Section content can be specified as a string or otq query. The format of string and output columns of otq query must correspond with the section documentation. Parameters ---------- name : str Section name data : str or :class:`otp.Source` Content of the section attrs : :obj:`dict`, optional Attributes of the section Examples -------- Data provided as a string: >>> data = 'SYM1|20100101093000|20100101110000' + os.linesep >>> data += 'SYM2|20100101110000|20100103140000' >>> section = otp.RefDB.Section('SECTION_NAME', data, {'ATTR1': 'VAL1', 'ATTR2': 'VAL2'}) >>> print(section) <SECTION_NAME ATTR1="VAL1" ATTR2="VAL2"> SYM1|20100101093000|20100101110000 SYM2|20100101110000|20100103140000 </SECTION_NAME> Data provided as a :class:`otp.Source`: >>> data = dict() >>> data['SYMBOL_NAME'] = ['SYM1', 'SYM2'] >>> data['START_DATETIME'] = [otp.dt(2010, 1, 1, 9, 30, tz='EST5EDT'), otp.dt(2010, 1, 1, 11, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 1, 11, tz='EST5EDT'), otp.dt(2010, 1, 3, 14, tz='EST5EDT')] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> section = otp.RefDB.Section('SECTION_NAME', ticks, {'ATTR1': 'VAL1', 'ATTR2': 'VAL2'}) >>> print(section) # doctest:+ELLIPSIS <SECTION_NAME ATTR1="VAL1" ATTR2="VAL2" OTQ_QUERY=...> </SECTION_NAME> where OTQ_QUERY is path to :class:`otp.Source`, dumped to disk as temporary otq. """ # Read ref db guide for details on input format of sections # http://solutions.pages.soltest.onetick.com/iac/onetick-server/ReferenceDatabaseGuide.html def __init__(self, name: str, data: Union[str, 'otp.Source'], attrs: dict = None): self._name = name self._data = data self._attrs = ' '.join([f'{name}="{value}"' for name, value in attrs.items()]) if attrs else '' def __str__(self): if isinstance(self._data, str): return f'<{self._name} {self._attrs}>{os.linesep}{self._data}{os.linesep}</{self._name}>' otq = self._data.to_otq() return f'<{self._name} {self._attrs} OTQ_QUERY={otq}>{os.linesep}</{self._name}>' class SymbolNameHistory(Section): """ Describes symbol changes for the same security. The continuity can be expressed in terms of any symbol type and can be specified on the security level or the security+exchange level (more explicit). Examples -------- >>> data = 'CORE_A||20100101093000|20100101110000|CORE_B||20100101110000|20100103140000|' >>> section = otp.RefDB.SymbolNameHistory(data, symbology='CORE') >>> print(section) <SYMBOL_NAME_HISTORY SYMBOLOGY="CORE"> CORE_A||20100101093000|20100101110000|CORE_B||20100101110000|20100103140000| </SYMBOL_NAME_HISTORY> Equivalent :class:`otp.Source`: >>> data = dict() >>> data['SYMBOL_NAME'] = ['CORE_A'] * 2 >>> data['SYMBOL_NAME_IN_HISTORY'] = ['CORE_A', 'CORE_B'] >>> data['SYMBOL_START_DATETIME'] = [otp.dt(2010, 1, 2, tz='EST5EDT')] * 2 >>> data['SYMBOL_END_DATETIME'] = [otp.dt(2010, 1, 5, tz='EST5EDT')] * 2 >>> data['START_DATETIME'] = [otp.dt(2010, 1, 2, tz='EST5EDT'), otp.dt(2010, 1, 3, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 3, tz='EST5EDT'), otp.dt(2010, 1, 4, tz='EST5EDT')] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> section = otp.RefDB.SymbolNameHistory(ticks, symbology='CORE') >>> print(section) # doctest:+ELLIPSIS <SYMBOL_NAME_HISTORY SYMBOLOGY="CORE" OTQ_QUERY=...> </SYMBOL_NAME_HISTORY> """ def __init__(self, data: Union[str, 'otp.Source'], symbology: str): super().__init__('SYMBOL_NAME_HISTORY', data, {'SYMBOLOGY': symbology}) class SymbologyMapping(Section): """ Describes a history of mapping of symbols of one symbology to the symbols of another symbology. Examples -------- >>> data = 'A||20100101093000|20100101110000|CORE_A|' + os.linesep >>> data += 'B||20100101110000|20100103140000|CORE_B|' >>> section = otp.RefDB.SymbologyMapping(data, source_symbology='TICKER', dest_symbology='CORE') >>> print(section) <SYMBOLOGY_MAPPING SOURCE_SYMBOLOGY="TICKER" DEST_SYMBOLOGY="CORE"> A||20100101093000|20100101110000|CORE_A| B||20100101110000|20100103140000|CORE_B| </SYMBOLOGY_MAPPING> Equivalent :class:`otp.Source`: >>> data = dict() >>> data['SYMBOL_NAME'] = ['A', 'B'] >>> data['MAPPED_SYMBOL_NAME'] = ['CORE_A', 'CORE_B'] >>> data['START_DATETIME'] = [otp.dt(2010, 1, 2, tz='EST5EDT'), otp.dt(2010, 1, 3, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 3, tz='EST5EDT'), otp.dt(2010, 1, 4, tz='EST5EDT')] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> section = otp.RefDB.SymbologyMapping(ticks, source_symbology='TICKER', dest_symbology='CORE') >>> print(section) # doctest:+ELLIPSIS <SYMBOLOGY_MAPPING SOURCE_SYMBOLOGY="TICKER" DEST_SYMBOLOGY="CORE" OTQ_QUERY=...> </SYMBOLOGY_MAPPING> """ def __init__(self, data: Union[str, 'otp.Source'], source_symbology: str, dest_symbology: str): super().__init__('SYMBOLOGY_MAPPING', data, {'SOURCE_SYMBOLOGY': source_symbology, 'DEST_SYMBOLOGY': dest_symbology}) class CorpActions(Section): """ Describes corporate actions. Used by OneTick to compute prices adjusted for various types of corporate actions. Supports both built-in and custom (user-defined) types of corporate actions. Examples -------- >>> data = 'CORE_C||20100103180000|0.25|0.0|SPLIT' >>> section = otp.RefDB.CorpActions(data, symbology='CORE') >>> print(section) <CORP_ACTIONS SYMBOLOGY="CORE"> CORE_C||20100103180000|0.25|0.0|SPLIT </CORP_ACTIONS> Equivalent :class:`otp.Source`: >>> data = dict() >>> data['SYMBOL_NAME'] = ['CORE_C'] >>> data['EFFECTIVE_DATETIME'] = [otp.dt(2010, 1, 3, 18, tz='EST5EDT')] >>> data['MULTIPLICATIVE_ADJUSTMENT'] = [0.25] >>> data['ADDITIVE_ADJUSTMENT'] = [0.0] >>> data['ADJUSTMENT_TYPE_NAME'] = ['SPLIT'] >>> ticks = otp.Ticks(**data, offset=[0], db='LOCAL') >>> section = otp.RefDB.CorpActions(ticks, symbology='CORE') >>> print(section) # doctest:+ELLIPSIS <CORP_ACTIONS SYMBOLOGY="CORE" OTQ_QUERY=...> </CORP_ACTIONS> """ def __init__(self, data: Union[str, 'otp.Source'], symbology: str): super().__init__('CORP_ACTIONS', data, {'SYMBOLOGY': symbology}) class ContinuousContracts(Section): """ Describes continuous contracts. Continuity is expressed in terms of stitched history of real contracts and rollover adjustments in between them and can be specified on the continuous contract level or continuous contract+exchange level (more explicit). Examples -------- >>> data = 'CC||CORE_A||20100101093000|20100101110000|0.5|0|CORE_B||20100101110000|20100103140000' >>> section = otp.RefDB.ContinuousContracts(data, symbology='CORE') >>> print(section) <CONTINUOUS_CONTRACTS SYMBOLOGY="CORE"> CC||CORE_A||20100101093000|20100101110000|0.5|0|CORE_B||20100101110000|20100103140000 </CONTINUOUS_CONTRACTS> Equivalent :class:`otp.Source`: >>> data = dict() >>> data['CONTINUOUS_CONTRACT_NAME'] = ['CC'] * 2 >>> data['SYMBOL_NAME'] = ['CORE_A', 'CORE_B'] >>> data['START_DATETIME'] = [otp.dt(2010, 1, 2, tz='EST5EDT'), otp.dt(2010, 1, 3, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 3, tz='EST5EDT'), otp.dt(2010, 1, 4, tz='EST5EDT')] >>> data['MULTIPLICATIVE_ADJUSTMENT'] = [0.5, None] >>> data['ADDITIVE_ADJUSTMENT'] = [3, None] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> section = otp.RefDB.ContinuousContracts(ticks, symbology='CORE') >>> print(section) # doctest:+ELLIPSIS <CONTINUOUS_CONTRACTS SYMBOLOGY="CORE" OTQ_QUERY=...> </CONTINUOUS_CONTRACTS> """ def __init__(self, data: Union[str, 'otp.Source'], symbology: str): super().__init__('CONTINUOUS_CONTRACTS', data, {'SYMBOLOGY': symbology}) class SymbolCurrency(Section): """ Specifies symbols' currencies in 3-letter ISO codes for currencies. These are used for currency conversion (e.g., when calculating portfolio price for a list of securities with different currencies). Examples -------- >>> data = 'CORE_A||20100101093000|20100101110000|USD|1.0' + os.linesep >>> data += 'CORE_B||20100101110000|20100103140000|RUB|1.8' >>> section = otp.RefDB.SymbolCurrency(data, symbology='CORE') >>> print(section) <SYMBOL_CURRENCY SYMBOLOGY="CORE"> CORE_A||20100101093000|20100101110000|USD|1.0 CORE_B||20100101110000|20100103140000|RUB|1.8 </SYMBOL_CURRENCY> Equivalent :class:`otp.Source`: >>> data = dict() >>> data['SYMBOL_NAME'] = ['CORE_A', 'CORE_B',] >>> data['CURRENCY'] = ['USD', 'RUB'] >>> data['MULTIPLIER'] = [1., 1.8] >>> data['START_DATETIME'] = [otp.dt(2010, 1, 1, 9, 30, tz='EST5EDT'), otp.dt(2010, 1, 1, 11, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 1, 11, tz='EST5EDT'), otp.dt(2010, 1, 3, 14, tz='EST5EDT')] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> section = otp.RefDB.SymbolCurrency(ticks, symbology='CORE') >>> print(section) # doctest:+ELLIPSIS <SYMBOL_CURRENCY SYMBOLOGY="CORE" OTQ_QUERY=...> </SYMBOL_CURRENCY> """ def __init__(self, data: Union[str, 'otp.Source'], symbology: str): super().__init__('SYMBOL_CURRENCY', data, {'SYMBOLOGY': symbology}) class Calendar(Section): """ Specifies a named calendar. Needed to analyze tick data during specific market time intervals (i.e., during normal trading hours). Can either be used directly in queries as described below, or referred to from the SYMBOL_CALENDAR and EXCH_CALENDAR sections. Examples -------- >>> data = 'CAL1|20100101093000|20100101110000|Regular|R|0.0.12345|093000|160000|GMT|1|DESCRIPTION1' >>> data += os.linesep >>> data += 'CAL2|20100101110000|20100103140000|Holiday|F|0.0.12345|094000|170000|GMT|0|DESCRIPTION2' >>> section = otp.RefDB.Calendar(data) >>> print(section) <CALENDAR > CAL1|20100101093000|20100101110000|Regular|R|0.0.12345|093000|160000|GMT|1|DESCRIPTION1 CAL2|20100101110000|20100103140000|Holiday|F|0.0.12345|094000|170000|GMT|0|DESCRIPTION2 </CALENDAR> Equivalent :class:`otp.Source`: >>> data = dict() >>> data['CALENDAR_NAME'] = ['CAL1', 'CAL2'] >>> data['START_DATETIME'] = [otp.dt(2010, 1, 1, 9, 30, tz='EST5EDT'), otp.dt(2010, 1, 1, 11, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 1, 11, tz='EST5EDT'), otp.dt(2010, 1, 3, 14, tz='EST5EDT')] >>> data['SESSION_NAME'] = ['Regular', 'Holiday'] >>> data['SESSION_FLAGS'] = ['R', 'H'] >>> data['DAY_PATTERN'] = ['0.0.12345', '0.0.12345'] >>> data['START_HHMMSS'] = ['093000', '094000'] >>> data['END_HHMMSS'] = ['160000', '170000'] >>> data['TIMEZONE'] = ['GMT', 'GMT'] >>> data['PRIORITY'] = [1, 0] >>> data['DESCRIPTION'] = ['DESCRIPTION1', 'DESCRIPTION2'] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> section = otp.RefDB.Calendar(ticks) >>> print(section) # doctest:+ELLIPSIS <CALENDAR OTQ_QUERY=...> </CALENDAR> """ def __init__(self, data: Union[str, 'otp.Source']): super().__init__('CALENDAR', data) class SymbolCalendar(Section): """ Specifies a calendar for a symbol. Needed to analyze tick data during specific market time intervals (i.e., during normal trading hours). Can either be specified directly or refer to a named calendar by its name (see the CALENDAR section). Examples -------- Symbol calendar section, referring to named calendar section: >>> data = 'CORE_A|20100101093000|20100101110000|CAL1' + os.linesep >>> data += 'CORE_B|20100101110000|20100103140000|CAL2' >>> section = otp.RefDB.SymbolCalendar(data, symbology='CORE') >>> print(section) <SYMBOL_CALENDAR SYMBOLOGY="CORE"> CORE_A|20100101093000|20100101110000|CAL1 CORE_B|20100101110000|20100103140000|CAL2 </SYMBOL_CALENDAR> Equivalent :class:`otp.Source`: >>> data = dict() >>> data['SYMBOL_NAME'] = ['CORE_A', 'CORE_B'] >>> data['START_DATETIME'] = [otp.dt(2010, 1, 1, 9, 30, tz='EST5EDT'), otp.dt(2010, 1, 1, 11, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 1, 11, tz='EST5EDT'), otp.dt(2010, 1, 3, 14, tz='EST5EDT')] >>> data['CALENDAR_NAME'] = ['CAL1', 'CAL2'] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> section = otp.RefDB.SymbolCalendar(ticks, symbology='CORE') >>> print(section) # doctest:+ELLIPSIS <SYMBOL_CALENDAR SYMBOLOGY="CORE" OTQ_QUERY=...> </SYMBOL_CALENDAR> Symbol calendar section without using named calendar section: >>> data = 'CORE_A|20100101093000|20100101110000|Regular|R|0.0.12345|093000|160000|EST5EDT|1|' + os.linesep >>> data += 'CORE_B|20100101110000|20100103140000|Regular|F|0.0.12345|093000|160000|EST5EDT|1|' >>> section = otp.RefDB.SymbolCalendar(data, symbology='CORE') >>> print(section) <SYMBOL_CALENDAR SYMBOLOGY="CORE"> CORE_A|20100101093000|20100101110000|Regular|R|0.0.12345|093000|160000|EST5EDT|1| CORE_B|20100101110000|20100103140000|Regular|F|0.0.12345|093000|160000|EST5EDT|1| </SYMBOL_CALENDAR> Equivalent :class:`otp.Source`: >>> data = dict() >>> data['SYMBOL_NAME'] = ['CORE_A', 'CORE_B'] >>> data['START_DATETIME'] = [otp.dt(2010, 1, 1, 9, 30, tz='EST5EDT'), otp.dt(2010, 1, 1, 11, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 1, 11, tz='EST5EDT'), otp.dt(2010, 1, 3, 14, tz='EST5EDT')] >>> data['SESSION_NAME'] = ['Regular', 'Regular'] >>> data['SESSION_FLAGS'] = ['R', 'F'] >>> data['DAY_PATTERN'] = ['0.0.12345', '0.0.12345'] >>> data['START_HHMMSS'] = ['093000', '160000'] >>> data['END_HHMMSS'] = ['CAL1', 'CAL2'] >>> data['TIMEZONE'] = ['EST5EDT', 'EST5EDT'] >>> data['PRIORITY'] = [1, 1] >>> data['DESCRIPTION'] = ['', ''] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> section = otp.RefDB.SymbolCalendar(ticks, symbology='CORE') >>> print(section) # doctest:+ELLIPSIS <SYMBOL_CALENDAR SYMBOLOGY="CORE" OTQ_QUERY=...> </SYMBOL_CALENDAR> """ def __init__(self, data: Union[str, 'otp.Source'], symbology: str): super().__init__('SYMBOL_CALENDAR', data, {'SYMBOLOGY': symbology}) class SectionStr(Section): """ Specification of a reference database section that can be specified only as a string. Section content still can be provided as a :class:`otp.Source`, but the :class:`otp.Source` is executed and result data is used as string in section. It's up to user to provide :class:`otp.Source` with correct number and order of columns. Examples -------- Data provided as a string returns the same result as :class:`otp.RefDB.Section`. Data provided as a :class:`otp.Source`: >>> data = dict() >>> data['SYMBOL_NAME'] = ['SYM1', 'SYM2'] >>> data['START_DATETIME'] = [otp.dt(2010, 1, 1, 9, 30, tz='EST5EDT'), otp.dt(2010, 1, 1, 11, tz='EST5EDT')] >>> data['END_DATETIME'] = [otp.dt(2010, 1, 1, 11, tz='EST5EDT'), otp.dt(2010, 1, 3, 14, tz='EST5EDT')] >>> ticks = otp.Ticks(**data, offset=[0] * 2, db='LOCAL') >>> ticks = ticks.table(SYMBOL_NAME=otp.string[128], START_DATETIME=otp.msectime, END_DATETIME=otp.msectime) >>> section = otp.RefDB.SectionStr('SECTION_NAME', ticks, {'ATTR1': 'VAL1', 'ATTR2': 'VAL2'}) >>> print(section) # doctest:+ELLIPSIS <SECTION_NAME ATTR1="VAL1" ATTR2="VAL2"> SYM1|20100101093000|20100101110000 SYM2|20100101110000|20100103140000 </SECTION_NAME> where OTQ_QUERY is path to :class:`otp.Source`, dumped to disk as temporary otq. """ def __init__(self, name: str, data: Union[str, 'otp.Source'], attrs: dict = None): data_str = data if isinstance(data, str) else self._source_to_str(data) super().__init__(name, data_str, attrs) def _source_to_str(self, data: 'otp.Source'): data = otp.run(data) data.drop(columns=['Time'], inplace=True) data = data.to_csv(sep='|', header=False, index=False, date_format='%Y%m%d%H%M%S') return data class PrimaryExchange(SectionStr): """ Specifies symbols' primary exchanges. Used to extract and analyze tick data for a security on its primary exchange, without having to explicitly specify the name of the primary exchange. Examples -------- >>> data = 'A||19991118000000|99999999000000|N|' >>> data += os.linesep >>> data += 'AA||19991118000000|99999999000000|N|AA.N' >>> section = otp.RefDB.PrimaryExchange(data, symbology='TICKER') >>> print(section) <PRIMARY_EXCHANGE SYMBOLOGY="TICKER"> A||19991118000000|99999999000000|N| AA||19991118000000|99999999000000|N|AA.N </PRIMARY_EXCHANGE> Equivalent otq query should return the same data values in the same order. Column names does not matter. """ def __init__(self, data: Union[str, 'otp.Source'], symbology: str): super().__init__('PRIMARY_EXCHANGE', data, {'SYMBOLOGY': symbology}) class ExchCalendar(SectionStr): """ Specifies symbols' primary exchanges. Used to extract and analyze tick data for a security on its primary exchange, without having to explicitly specify the name of the primary exchange. Examples -------- >>> data = 'NYSE||19600101000000|20501231235959|Regular|R|0.0.12345|093000|160000|EST5EDT|' >>> data += os.linesep >>> data += 'NYSE||19600101000000|20501231235959|Half-day|RL|12/31|093000|130000|EST5EDT|' >>> data += os.linesep >>> data += 'NYSE||19600101000000|20501231235959|Holiday|H|01/01|000000|240000|EST5EDT|' >>> section = otp.RefDB.ExchCalendar(data, symbology='MIC') >>> print(section) <EXCH_CALENDAR SYMBOLOGY="MIC"> NYSE||19600101000000|20501231235959|Regular|R|0.0.12345|093000|160000|EST5EDT| NYSE||19600101000000|20501231235959|Half-day|RL|12/31|093000|130000|EST5EDT| NYSE||19600101000000|20501231235959|Holiday|H|01/01|000000|240000|EST5EDT| </EXCH_CALENDAR> If a CALENDAR section is used: >>> data = 'LSE||19600101000000|20501231235959|WNY' >>> section = otp.RefDB.ExchCalendar(data, symbology='MIC') >>> print(section) <EXCH_CALENDAR SYMBOLOGY="MIC"> LSE||19600101000000|20501231235959|WNY </EXCH_CALENDAR> Equivalent otq query should return the same data values in the same order. Column names does not matter. """ def __init__(self, data: Union[str, 'otp.Source'], symbology: str): super().__init__('EXCH_CALENDAR', data, {'SYMBOLOGY': symbology}) class SymbolExchange(SectionStr): """ Specifies the exchange where a security is traded. Needs to be provided for the symbologies where the symbol name is unique across all exchanges. Examples -------- >>> data = 'IBM.N|19980825000000|20501231235959|NYSE||' >>> section = otp.RefDB.SymbolExchange(data, symbology='RIC', exchange_symbology='MIC') >>> print(section) <SYMBOL_EXCHANGE SYMBOLOGY="RIC" EXCHANGE_SYMBOLOGY="MIC"> IBM.N|19980825000000|20501231235959|NYSE|| </SYMBOL_EXCHANGE> Equivalent otq query should return the same data values in the same order. Column names does not matter. """ def __init__(self, data: Union[str, 'otp.Source'], symbology: str, exchange_symbology: str): super().__init__('SYMBOL_EXCHANGE', data, {'SYMBOLOGY': symbology, 'EXCHANGE_SYMBOLOGY': exchange_symbology}) def put( self, src: Union[str, List[Section]], tickdb_symbology: List[str] = None, delta_mode: bool = False, full_integrity_check: bool = False, load_by_sections: bool = True, ): """ Loads data in database with reference_data_loader.exe. If db properties contain SUPPORT_DELTAS=YES, delta_mode set to True, and proper delta file is used then data is loaded in incremental mode (in other words, replace or modification mode). If the above conditions are not met, reference database content is entirely rewritten with the new data. Parameters ---------- src : str, list of str or otp.RefDB.Section Path to data file, or list of data per section in specified format tickdb_symbology : list of str, optional All symbologies for which the reference data needs to be generated delta_mode : bool, default is False If set to True loader will perform incremental load. Cannot be used if tickdb_symbology is specified full_integrity_check : bool, default is False If set to True loader checks all mappings to symbologies with symbol name history section and gives warning if mapped securities do not have symbol name history load_by_sections : bool, default is True If set to True loader will perform input data file splitting by data types and symbologies to load each part separately instead loading the entire file at once """ # More info: # http://solutions.pages.soltest.onetick.com/iac/onetick-server/reference_data_loader.html - loader doc # https://onemarketdata.atlassian.net/browse/KB-286 - details on delta mode return self._session_handler( self._put, src=src, delta_mode=delta_mode, full_integrity_check=full_integrity_check, load_by_sections=load_by_sections, tickdb_symbology=tickdb_symbology, ) def _prepare_data_file(self, src): if isinstance(src, str): return src data = f'<VERSION_INFO VERSION="1">{os.linesep}</VERSION_INFO>' for section in src: data += f'{os.linesep}{os.linesep}{section}' data_file = utils.TmpFile(suffix='.txt') with open(data_file.path, 'w') as f: f.writelines(data) data_file.close() return data_file.path def _prepare_loader_args(self, data_file, tickdb_symbology, delta_mode, full_integrity_check, load_by_sections): loader_args = ['-dbname', self.name] loader_args += ['-data_file', data_file] if tickdb_symbology: for symbology in tickdb_symbology: loader_args += ['-tickdb_symbology', symbology] loader_args += ['-delta_mode', 'yes' if delta_mode else 'no'] loader_args += ['-full_integrity_check', 'yes' if full_integrity_check else 'no'] loader_args += ['-load_by_sections', 'yes' if load_by_sections else 'no'] return loader_args def _put(self, src, tickdb_symbology, delta_mode, full_integrity_check, load_by_sections): data_file = self._prepare_data_file(src) loader_args = self._prepare_loader_args( data_file, tickdb_symbology, delta_mode, full_integrity_check, load_by_sections ) loader_path = os.path.join(utils.omd_dist_path(), 'one_tick', 'bin', 'reference_data_loader.exe') p = subprocess.run( [loader_path] + loader_args, env={ 'ONE_TICK_CONFIG': session.Session._instance.config.path, 'TZ': os.environ.get('TZ', otp.config['tz']), }, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) return p.stdout, p.stderr def add(self, *args, **kwargs): # this method is not implemented because # reference database loader can only rewrite the data, not add new entries raise NotImplementedError("Method is not supported for reference databases. Use put instead.")