Source code for qf_lib.analysis.trade_analysis.trade_analysis_sheet

#     Copyright 2016-present CERN – European Organization for Nuclear Research
#
#     Licensed under the Apache License, Version 2.0 (the "License");
#     you may not use this file except in compliance with the License.
#     You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#     Unless required by applicable law or agreed to in writing, software
#     distributed under the License is distributed on an "AS IS" BASIS,
#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#     See the License for the specific language governing permissions and
#     limitations under the License.

from datetime import datetime
from math import sqrt
from typing import Union, Tuple, Sequence, List, Callable, Optional

import matplotlib as plt
import numpy as np
from matplotlib.ticker import MaxNLocator
from pandas import Timedelta

from qf_lib.analysis.common.abstract_document import AbstractDocument
from qf_lib.common.utils.error_handling import ErrorHandling
from qf_lib.backtesting.fast_alpha_model_tester.scenarios_generator import ScenariosGenerator
from qf_lib.backtesting.portfolio.trade import Trade
from qf_lib.common.utils.miscellaneous.constants import DAYS_PER_YEAR_AVG
from qf_lib.common.utils.numberutils.is_finite_number import is_finite_number
from qf_lib.common.utils.returns.max_drawdown import max_drawdown
from qf_lib.common.utils.returns.sqn import sqn, sqn_for100trades, avg_nr_of_trades_per1y
from qf_lib.containers.dataframe.prices_dataframe import PricesDataFrame
from qf_lib.containers.dataframe.qf_dataframe import QFDataFrame
from qf_lib.containers.series.qf_series import QFSeries
from qf_lib.containers.series.simple_returns_series import SimpleReturnsSeries
from qf_lib.documents_utils.document_exporting.element.chart import ChartElement
from qf_lib.documents_utils.document_exporting.element.df_table import DFTable
from qf_lib.documents_utils.document_exporting.element.heading import HeadingElement
from qf_lib.documents_utils.document_exporting.element.new_page import NewPageElement
from qf_lib.documents_utils.document_exporting.pdf_exporter import PDFExporter
from qf_lib.plotting.charts.chart import Chart
from qf_lib.plotting.charts.histogram_chart import HistogramChart
from qf_lib.plotting.charts.line_chart import LineChart
from qf_lib.plotting.decorators.axes_formatter_decorator import AxesFormatterDecorator, PercentageFormatter
from qf_lib.plotting.decorators.axes_label_decorator import AxesLabelDecorator
from qf_lib.plotting.decorators.axes_locator_decorator import AxesLocatorDecorator
from qf_lib.plotting.decorators.axes_position_decorator import AxesPositionDecorator
from qf_lib.plotting.decorators.data_element_decorator import DataElementDecorator
from qf_lib.plotting.decorators.legend_decorator import LegendDecorator
from qf_lib.plotting.decorators.line_decorators import VerticalLineDecorator
from qf_lib.plotting.decorators.title_decorator import TitleDecorator
from qf_lib.settings import Settings


[docs]@ErrorHandling.class_error_logging() class TradeAnalysisSheet(AbstractDocument): """ Creates a PDF containing main statistics of the trades. Parameters ------------- settings: Settings settings of the project pdf_exporter: PDFExporter tool that creates the pdf with the result nr_of_assets_traded: int number of assets traded trades: Sequence[Trade] list of trades start_date: datetime end_date: datetime title: str title of the document, will be a part of the filename. Do not use special characters """ def __init__(self, settings: Settings, pdf_exporter: PDFExporter, nr_of_assets_traded: int, trades: Sequence[Trade], start_date: datetime, end_date: datetime, initial_risk: Optional[float] = None, title: str = "Trades"): super().__init__(settings, pdf_exporter, title) self.start_date = start_date self.end_date = end_date self.initial_risk = initial_risk self.trades = sorted(trades, key=lambda t: (t.end_time, t.start_time)) self.nr_of_assets_traded = nr_of_assets_traded def build_document(self): self._add_header() self._add_returns_distribution() self._add_stats_table() self._add_simulation_results() def _add_returns_distribution(self): if self.initial_risk is not None: returns = SimpleReturnsSeries(data=[t.percentage_pnl / self.initial_risk for t in self.trades]) title = "Distribution of R multiples, Initial risk = {:.2%}".format(self.initial_risk) returns_histogram = self._get_distribution_plot(returns, title) else: returns = SimpleReturnsSeries(data=[t.percentage_pnl for t in self.trades]) title = "Distribution of returns [%]" returns_histogram = self._get_distribution_plot(returns, title) # Format the x-axis so that its labels are shown as a percentage in case of percentage returns axes_formatter_decorator = AxesFormatterDecorator(x_major=PercentageFormatter(), key="axes_formatter") returns_histogram.add_decorator(axes_formatter_decorator) self.document.add_element(ChartElement(returns_histogram, figsize=self.full_image_size, dpi=self.dpi)) def _add_stats_table(self): statistics = [] # type: List[Tuple] def append_to_statistics(measure_description: str, function: Callable, trades_containers, percentage_style: bool = False): style_format = "{:.2%}" if percentage_style else "{:.2f}" returned_values = (function(tc) for tc in trades_containers) returned_values = (value if is_finite_number(value) else 0.0 for value in returned_values) statistics.append((measure_description, *(style_format.format(val) for val in returned_values))) # Prepare trades data frame, used to generate all statistics trades_df = QFDataFrame.from_records( data=[(t.start_time, t.end_time, t.percentage_pnl, t.direction) for t in self.trades], columns=["start time", "end time", "percentage pnl", "direction"] ) # In case if the initial risk is not set all the return statistic will be computed using the percentage pnl, # otherwise the r_multiply = percentage pnl / initial risk is used unit = "%" if self.initial_risk is None else "R" trades_df["returns"] = trades_df["percentage pnl"] if self.initial_risk is None \ else trades_df["percentage pnl"] / self.initial_risk # Filter out only long and only long_trades_df = trades_df[trades_df["direction"] > 0] short_trades_df = trades_df[trades_df["direction"] < 0] all_dfs = [trades_df, long_trades_df, short_trades_df] append_to_statistics("Number of trades", len, all_dfs) append_to_statistics("% of trades number", lambda df: len(df) / len(trades_df) if len(trades_df) > 0 else 0, all_dfs, percentage_style=True) period_length_in_years = Timedelta(self.end_date - self.start_date) / Timedelta(days=1) / DAYS_PER_YEAR_AVG append_to_statistics("Avg number of trades per year", lambda df: len(df) / period_length_in_years, all_dfs) append_to_statistics("Avg number of trades per year per asset", lambda df: len(df) / period_length_in_years / self.nr_of_assets_traded, all_dfs) def percentage_of_positive_trades(df: QFDataFrame): return len(df[df["returns"] > 0]) / len(df) if len(df) > 0 else 0.0 append_to_statistics("% of positive trades", percentage_of_positive_trades, all_dfs, percentage_style=True) def percentage_of_negative_trades(df: QFDataFrame): return len(df[df["returns"] < 0]) / len(df) if len(df) > 0 else 0.0 append_to_statistics("% of negative trades", percentage_of_negative_trades, all_dfs, percentage_style=True) def avg_trade_duration(df: QFDataFrame): trades_duration = (df["end time"] - df["start time"]) / Timedelta(days=1) return trades_duration.mean() append_to_statistics("Average trade duration [days]", avg_trade_duration, all_dfs) append_to_statistics("Average trade return [{}]".format(unit), lambda df: df["returns"].mean(), all_dfs, percentage_style=(self.initial_risk is None)) append_to_statistics("Std trade return [{}]".format(unit), lambda df: df["returns"].std(), all_dfs, percentage_style=(self.initial_risk is None)) def avg_positive_trade_return(df: QFDataFrame): positive_trades = df[df["returns"] > 0] return positive_trades["returns"].mean() append_to_statistics("Average positive return [{}]".format(unit), avg_positive_trade_return, all_dfs, percentage_style=(self.initial_risk is None)) def avg_negative_trade_return(df: QFDataFrame): negative_trades = df[df["returns"] < 0] return negative_trades["returns"].mean() append_to_statistics("Average negative return [{}]".format(unit), avg_negative_trade_return, all_dfs, percentage_style=(self.initial_risk is None)) append_to_statistics("Best trade return [{}]".format(unit), lambda df: df["returns"].max(), all_dfs, percentage_style=(self.initial_risk is None)) append_to_statistics("Worst trade return [{}]".format(unit), lambda df: df["returns"].min(), all_dfs, percentage_style=(self.initial_risk is None)) append_to_statistics("SQN (per trade) [{}]".format(unit), lambda df: sqn(df["returns"]), all_dfs, percentage_style=(self.initial_risk is None)) append_to_statistics("SQN (per 100 trades) [{}]".format(unit), lambda df: sqn_for100trades(df["returns"]), all_dfs, percentage_style=(self.initial_risk is None)) def sqn_per_year(returns: QFSeries): sqn_per_year_value = sqn(returns) * sqrt(avg_nr_of_trades_per1y(returns, self.start_date, self.end_date)) return sqn_per_year_value append_to_statistics("SQN (per year) [{}]".format(unit), lambda df: sqn_per_year(df["returns"]), all_dfs, percentage_style=(self.initial_risk is None)) statistics_df = QFDataFrame.from_records(statistics, columns=["Measure", "All trades", "Long trades", "Short trades"]) table = DFTable(statistics_df, css_classes=['table', 'left-align']) table.add_columns_classes(["Measure"], 'wide-column') self.document.add_element(table) def _add_simulation_results(self): """ Generate a data frame consisting of a certain number of "scenarios" (each scenario denotes one single equity curve). """ self.document.add_element(NewPageElement()) self.document.add_element(HeadingElement(level=1, text="Monte Carlo simulations\n")) self.document.add_element(HeadingElement(level=2, text="Average number of trades per year: {}\n".format( int(self._average_number_of_trades_per_year())))) if self.initial_risk is not None: self.document.add_element(HeadingElement(level=2, text="Initial risk: {:.2%}".format(self.initial_risk))) scenarios_df, total_returns = self._get_scenarios() # Plot all the possible paths on a chart all_paths_chart = self._get_simulation_plot(scenarios_df) self.document.add_element(ChartElement(all_paths_chart, figsize=self.full_image_size, dpi=self.dpi)) # Plot the distribution plot distribution_plot = self._get_distribution_plot( total_returns, title="Monte Carlo Simulations Distribution (one year % return)", bins=200, crop=True) # Format the x-axis so that its labels are shown as a percentage in case of percentage returns axes_formatter_decorator = AxesFormatterDecorator(x_major=PercentageFormatter(), key="axes_formatter") distribution_plot.add_decorator(axes_formatter_decorator) self.document.add_element(ChartElement(distribution_plot, figsize=self.full_image_size, dpi=self.dpi)) simulations_summary_table = self._get_monte_carlos_simulator_outputs(scenarios_df, total_returns) self.document.add_element(simulations_summary_table) # Extract the results of each of the scenarios and summarize the data in the tables dist_summary_tables = self._get_distribution_summary_table(total_returns) self.document.add_element(dist_summary_tables) # Add the "Chances of dropping below" and "Simulations summary" tables ruin_chances_table = self._get_chances_of_dropping_below_table(scenarios_df) self.document.add_element(ruin_chances_table) def _get_scenarios(self, num_of_scenarios: int = 2500) -> Tuple[PricesDataFrame, SimpleReturnsSeries]: # Generate scenarios, each of which consists of a certain number of trades, equal to the average number # of trades per year scenarios_generator = ScenariosGenerator() trade_returns = [trade.percentage_pnl for trade in self.trades] # Generate the scenarios scenarios_df = scenarios_generator.make_scenarios( trade_returns, scenarios_length=int(self._average_number_of_trades_per_year()), num_of_scenarios=num_of_scenarios ) scenarios_df = scenarios_df.to_prices() return scenarios_df, scenarios_df.iloc[-1] / scenarios_df.iloc[0] - 1.0 def _average_number_of_trades_per_year(self): """ Computes the average number of trades per year. """ number_of_trades = len(self.trades) period_length_in_years = Timedelta(self.end_date - self.start_date) / Timedelta(days=1) / DAYS_PER_YEAR_AVG return number_of_trades / period_length_in_years def _get_simulation_plot(self, scenarios_df: PricesDataFrame) -> Chart: chart = LineChart(log_scale=True) for _, scenario in scenarios_df.items(): data_element = DataElementDecorator(scenario, linewidth=0.5) chart.add_decorator(data_element) # Add a legend legend = LegendDecorator(key="legend_decorator") # Add Ensemble average ensemble_avg = scenarios_df.mean(axis=1) ensemble_avg_data_element = DataElementDecorator(ensemble_avg, color="#e1e5f4", linewidth=3) chart.add_decorator(ensemble_avg_data_element) legend.add_entry(ensemble_avg_data_element, "Ensemble average") # Add Expectation (vol adjusted) trade_returns = QFSeries(data=[trade.percentage_pnl for trade in self.trades]) std = trade_returns.std() expectation_adj_series = np.ones(len(ensemble_avg)) * (trade_returns.mean() - 0.5 * std * std) expectation_adj_series = SimpleReturnsSeries(data=expectation_adj_series, index=ensemble_avg.index) expectation_adj_series = expectation_adj_series.to_prices() data_element = DataElementDecorator(expectation_adj_series, color="#46474b", linewidth=2) chart.add_decorator(data_element) legend.add_entry(data_element, "Expectation (vol adjusted)") # Add title title_decorator = TitleDecorator("Monte Carlo Simulations (log scale)", key="title") chart.add_decorator(title_decorator) position_decorator = AxesPositionDecorator(*self.full_image_axis_position) chart.add_decorator(position_decorator) chart.add_decorator(legend) return chart def _get_distribution_plot(self, data_series: SimpleReturnsSeries, title: str, bins: Union[int, str] = 50, crop: bool = False): colors = Chart.get_axes_colors() if crop: start_x = np.quantile(data_series, 0.01) end_x = np.quantile(data_series, 0.99) chart = HistogramChart(data_series, bins=bins, start_x=start_x, end_x=end_x) else: chart = HistogramChart(data_series, bins=bins) # Only show whole numbers on the y-axis. y_axis_locator = MaxNLocator(integer=True) axes_locator_decorator = AxesLocatorDecorator(y_major=y_axis_locator, key="axes_locator") chart.add_decorator(axes_locator_decorator) # Add an average line. avg_line = VerticalLineDecorator(data_series.mean(), color=colors[1], key="average_line_decorator", linestyle="--", alpha=0.8) chart.add_decorator(avg_line) # Add a legend. legend = LegendDecorator(key="legend_decorator") legend.add_entry(avg_line, "Mean") chart.add_decorator(legend) # Add a title. title_decorator = TitleDecorator(title, key="title") chart.add_decorator(title_decorator) chart.add_decorator(AxesLabelDecorator(title, "Occurrences")) position_decorator = AxesPositionDecorator(*self.full_image_axis_position) chart.add_decorator(position_decorator) return chart def _get_distribution_summary_table(self, scenarios_results: SimpleReturnsSeries) -> DFTable: rows = [] percentage_list = [0.05, 0.1, 0.2, 0.3] for percentage in percentage_list: rows.append(("{:.0%} Tail".format(percentage), "{:.2%}".format(np.quantile(scenarios_results, percentage)))) rows.append(("50%", "{:.2%}".format(np.quantile(scenarios_results, 0.5)))) for percentage in reversed(percentage_list): rows.append(("{:.0%} Top".format(percentage), "{:.2%}".format(np.quantile(scenarios_results, (1.0 - percentage))))) table = DFTable(data=QFDataFrame.from_records(rows, columns=["Measure", "Value"]), css_classes=['table', 'left-align']) table.add_columns_classes(["Measure"], 'wide-column') return table def _get_chances_of_dropping_below_table(self, scenarios_df: PricesDataFrame) -> DFTable: _, all_scenarios_number = scenarios_df.shape rows = [] crop_table = False for percentage in np.linspace(0.1, 0.9, 9): # Count number of scenarios, whose returns at some point of time dropped below the percentage * initial # value _, scenarios_above_percentage = scenarios_df.where(scenarios_df > (1.0 - percentage)).dropna(axis=1).shape probability = (all_scenarios_number - scenarios_above_percentage) / all_scenarios_number rows.append(("{:.0%}".format(percentage), "{:.2%}".format(probability))) if crop_table is True: break elif probability < 0.1: crop_table = True table = DFTable(QFDataFrame.from_records(rows, columns=["Chances of dropping below", "Probability"]), css_classes=['table', 'left-align']) table.add_columns_classes(["Chances of dropping below"], 'wide-column') return table def _get_monte_carlos_simulator_outputs(self, scenarios_df: PricesDataFrame, total_returns: SimpleReturnsSeries) \ -> DFTable: _, all_scenarios_number = scenarios_df.shape rows = [] # Add the Median Return value median_return = np.median(total_returns) rows.append(("Median Return", "{:.2%}".format(median_return))) # Add the Mean Return value mean_return = total_returns.mean() rows.append(("Mean Return", "{:.2%}".format(mean_return))) trade_returns = QFSeries(data=[trade.percentage_pnl for trade in self.trades]) sample_len = int(self._average_number_of_trades_per_year()) std = trade_returns.std() expectation_adj_series = np.ones(sample_len) * (trade_returns.mean() - 0.5 * std * std) expectation_adj_series = SimpleReturnsSeries(data=expectation_adj_series) expectation_adj_series = expectation_adj_series.to_prices(suggested_initial_date=0) mean_volatility_adjusted_return = expectation_adj_series.iloc[-1] / expectation_adj_series.iloc[0] - 1.0 rows.append(("Mean Volatility Adjusted Return", "{:.2%}".format(mean_volatility_adjusted_return))) # Add the Median Drawdown max_drawdowns = max_drawdown(scenarios_df) median_drawdown = np.median(max_drawdowns) rows.append(("Median Maximum Drawdown", "{:.2%}".format(median_drawdown))) # Add the Median Return / Median Drawdown rows.append(("Return / Drawdown", "{:.2f}".format(median_return / median_drawdown))) # Probability, that the return will be > 0 scenarios_with_positive_result = total_returns[total_returns > 0.0].count() probability = scenarios_with_positive_result / all_scenarios_number rows.append(("Probability of positive return", "{:.2%}".format(probability))) table = DFTable(data=QFDataFrame.from_records(rows, columns=["Measure", "Value"]), css_classes=['table', 'left-align']) table.add_columns_classes(["Measure"], 'wide-column') return table def save(self, report_dir: str = "trades_analysis"): # Set the style for the report plt.style.use(['tearsheet']) filename = "%Y_%m_%d-%H%M {}.pdf".format(self.title) filename = datetime.now().strftime(filename) self.pdf_exporter.generate([self.document], report_dir, filename)