Source code for qf_lib.backtesting.fast_alpha_model_tester.scenarios_generator

#     Copyright 2016-present CERN – European Organization for Nuclear Research
#
#     Licensed under the Apache License, Version 2.0 (the "License");
#     you may not use this file except in compliance with the License.
#     You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#     Unless required by applicable law or agreed to in writing, software
#     distributed under the License is distributed on an "AS IS" BASIS,
#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#     See the License for the specific language governing permissions and
#     limitations under the License.
import warnings
from datetime import datetime
from itertools import zip_longest
from typing import Sequence

import numpy as np
from pandas import date_range

from qf_lib.backtesting.alpha_model.exposure_enum import Exposure
from qf_lib.common.enums.frequency import Frequency
from qf_lib.containers.dataframe.simple_returns_dataframe import SimpleReturnsDataFrame
from qf_lib.containers.series.qf_series import QFSeries


[docs]class ScenariosGenerator: """ Class used for generating different scenarios for Trades. """
[docs] def make_scenarios(self, trade_rets: Sequence[float], scenarios_length: int = 100, num_of_scenarios: int = 10000) \ -> SimpleReturnsDataFrame: """ Utility function to generate different trades scenarios, where each scenario is a series of returns for a given investment strategy. The scenarios of a given length are created by randomly choosing (with replacement) returns from the original sequence of a Trade's returns. The result is the SimpleReturnsDataFrame which is indexed by the Trade's ordinal number and has a scenario in each column. Parameters ---------- trade_rets: Sequence[float] sequence of floats which represent the returns on Trades performed by some investment strategy scenarios_length: int number of Trades which should simulated for each scenario num_of_scenarios: int number of scenarios which should be generated Returns ------- SimpleReturnsDataFrame data frame of size scenarios_length (rows) by num_of_scenarios (columns). It contains float numbers. """ values = np.random.choice(trade_rets, scenarios_length * num_of_scenarios) values = np.reshape(values, (scenarios_length, num_of_scenarios)) return SimpleReturnsDataFrame(values)
[docs] def make_exposure_scenarios(self, start_date: datetime, end_date: datetime, number_of_trades: int, time_in_the_market: float, exposure: Exposure = Exposure.LONG, frequency: Frequency = Frequency.DAILY, seed: int = None) -> QFSeries: """ Creates a random series, which contains information about the exposure of a certain asset for the given dates range. Based on a.o. the total desired number of trades and average holding time of the trades, the function generates random trades and fills the rows for corresponding dates with the desired exposure. In case if the number of trades provided is too high to create non-adjacent trades, which will together occupy <time_in_the_market>% percentage of time, the time span between some of the consecutive trades may be set to 0. In that case it may seem as if the returned number of trades was smaller than the expected number of trades. Exemplary output for daily trading, 2 trades, time in the market = 60% and desired exposure = LONG: 2021-10-01 Exposure.OUT 2021-10-02 Exposure.LONG 2021-10-03 Exposure.LONG 2021-10-04 Exposure.LONG 2021-10-05 Exposure.LONG 2021-10-06 Exposure.OUT 2021-10-07 Exposure.OUT 2021-10-08 Exposure.LONG 2021-10-09 Exposure.LONG 2021-10-10 Exposure.OUT Parameters ---------- start_date: datetime first date considered in the returned series end_date: datetime last date considered in the returned series number_of_trades: int total number of trades, which should be generated time_in_the_market: float total time of the ticker in the market (should be a percentage value, between 0.0 and 1.0) exposure: Exposure the desired exposure (either short or long) frequency: Frequency frequency of the trading seed: int seed used to make the scenarios deterministic Returns ------- QFSeries Series indexed by dates between start_date and end_date with the given frequency """ assert 0.0 <= time_in_the_market <= 1.0, "time_in_the_market should belong to the [0.0, 1.0] range" dates_index = date_range(start_date, end_date, freq=frequency.to_pandas_freq()) bars_amount = dates_index.size bars_in_the_market = round(bars_amount * time_in_the_market) if number_of_trades > bars_in_the_market: number_of_trades = bars_in_the_market warnings.warn(f"The desired number of trades is bigger than the number of bars in the market, which equals " f"time_in_the_market * number of all bars between start_date and end_date. The returned " f"number of trades will be reduced to {number_of_trades}.") trades_lengths = self._get_random_integers(bars_in_the_market, number_of_trades, False, seed) \ if bars_in_the_market > 0 else [] # Compute the period lengths between the trades out_days = bars_amount - bars_in_the_market include_zero = out_days < number_of_trades + 1 days_between_trades = self._get_random_integers(out_days, number_of_trades + 1, include_zero, seed) \ if out_days > 0 else [] # Create the timeseries with the randomly generated trading days exposures_list = [out * [Exposure.OUT] + long * [exposure] for out, long in zip_longest(days_between_trades, trades_lengths, fillvalue=0)] exposures_list = [el for sublist in exposures_list for el in sublist] return QFSeries(exposures_list, index=dates_index)
@staticmethod def _get_random_integers(sum_of_values: int, number_of_elements: int, include_zero: bool, seed: int = None): """ Create a list of random integers with given sum. """ # Initialise a random number generator rng = np.random.default_rng(seed) if not include_zero: elements = rng.choice(range(1, sum_of_values), number_of_elements - 1, replace=False).tolist() else: elements = rng.choice(range(0, sum_of_values + 1), number_of_elements - 1, replace=True).tolist() elements = sorted(elements + [0, sum_of_values]) elements = [t - s for s, t in zip(elements, elements[1:])] return elements