Source code for qf_lib.common.utils.factorization.data_presenters.data_presenter

#     Copyright 2016-present CERN – European Organization for Nuclear Research
#
#     Licensed under the Apache License, Version 2.0 (the "License");
#     you may not use this file except in compliance with the License.
#     You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#     Unless required by applicable law or agreed to in writing, software
#     distributed under the License is distributed on an "AS IS" BASIS,
#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#     See the License for the specific language governing permissions and
#     limitations under the License.

from datetime import datetime
from itertools import cycle
from typing import Mapping, Sequence

import numpy as np
import pandas as pd

from qf_lib.analysis.timeseries_analysis.timeseries_analysis import TimeseriesAnalysis
from qf_lib.common.enums.axis import Axis
from qf_lib.common.enums.matplotlib_location import Location
from qf_lib.common.enums.orientation import Orientation
from qf_lib.common.utils.dateutils.date_format import DateFormat
from qf_lib.common.utils.factorization.data_models.data_model import DataModel
from qf_lib.common.utils.factorization.factors_identification.elastic_net_factors_identifier import \
    ElasticNetFactorsIdentifier
from qf_lib.containers.series.prices_series import PricesSeries
from qf_lib.containers.series.qf_series import QFSeries
from qf_lib.containers.series.simple_returns_series import SimpleReturnsSeries
from qf_lib.plotting.charts.bar_chart import BarChart
from qf_lib.plotting.charts.chart import Chart
from qf_lib.plotting.charts.heatmap.color_bar import ColorBar
from qf_lib.plotting.charts.heatmap.heatmap_chart import HeatMapChart
from qf_lib.plotting.charts.heatmap.values_annotations import ValuesAnnotations
from qf_lib.plotting.charts.line_chart import LineChart
from qf_lib.plotting.decorators.axes_formatter_decorator import AxesFormatterDecorator, PercentageFormatter
from qf_lib.plotting.decorators.axes_label_decorator import AxesLabelDecorator
from qf_lib.plotting.decorators.axis_tick_labels_decorator import AxisTickLabelsDecorator
from qf_lib.plotting.decorators.coordinate import DataCoordinate, AxesCoordinate
from qf_lib.plotting.decorators.data_element_decorator import DataElementDecorator
from qf_lib.plotting.decorators.legend_decorator import LegendDecorator
from qf_lib.plotting.decorators.stem_decorator import StemDecorator
from qf_lib.plotting.decorators.text_decorator import TextDecorator
from qf_lib.plotting.decorators.title_decorator import TitleDecorator
from qf_lib.plotting.decorators.vertical_span_decorator import VerticalSpanDecorator
from qf_lib.plotting.helpers.index_translator import IndexTranslator


[docs]class DataPresenter: """ Class used for presenting the data stored in the FactorizationDataModel. Parameters ---------- model model for the data, which should be presented ticker_to_security_name_dict dictionary mapping tickers to security names """ def __init__(self, model: DataModel, ticker_to_security_name_dict: Mapping[str, str], enet_factors_identifier: ElasticNetFactorsIdentifier = None): self.model = model self.ticker_to_security_name_dict = ticker_to_security_name_dict self._enet_factors_identifier = enet_factors_identifier self._bars_width = 0.5 def model_info(self) -> str: analysed_tms = self.model.input_data.analysed_tms regressors_df = self.model.input_data.regressors_df fund_name = self._get_security_name(analysed_tms.name) iso_date = DateFormat.ISO.format_string today_iso_date = datetime.today().strftime(iso_date) model_info = '\n=============================================================\n' \ '\t Fund name: {} {} \n' \ '=============================================================\n' \ '\t Coefficient \tTicker Security name\n' \ '-------------------------------------------------------------\n'.format(fund_name, today_iso_date) for col_nr, ticker in enumerate(regressors_df.columns): security_name = self.ticker_to_security_name_dict.get(ticker, "") model_info += '\t {:+5.3f} {:15s} {:s}\n'.format( self.model.coefficients[col_nr], ticker, security_name) model_info += '\nIntercept is equal to {:4.2f} \n\n'.format(self.model.intercept) start_date = analysed_tms.index[0].strftime(iso_date) end_date = analysed_tms.index[-1].strftime(iso_date) num_of_returns = len(analysed_tms) frequency = str(self.model.input_data.frequency) model_info += 'The model was build based on {:d} {:s} returns \n' \ 'between {:s} and {:s}. \n'.format(num_of_returns, frequency, start_date, end_date) return model_info def get_model_and_fund_statistics(self) -> str: statistics = "\nFit R Square = {:5.3f} \n\n" \ "{:44s} \t Fit \n".format(self.model.r_squared, self.model.input_data.analysed_tms.name) statistics += TimeseriesAnalysis.values_in_table(self.model.fund_tms_analysis) return statistics def historical_performance_chart(self) -> LineChart: frequency = self.model.input_data.frequency analysed_tms = self.model.input_data.analysed_tms fitted_tms = self.model.fitted_tms cumulative_fund_rets = analysed_tms.to_prices(initial_price=1.0, frequency=frequency) - 1 cumulative_fit_rets = fitted_tms.to_prices(initial_price=1.0, frequency=frequency) - 1 hist_performance_chart = LineChart() fund_cummulative_rets_data_elem = DataElementDecorator(cumulative_fund_rets) fit_cummulative_rets_data_elem = DataElementDecorator(cumulative_fit_rets) legend_decorator = LegendDecorator(legend_placement=Location.LOWER_RIGHT) legend_decorator.add_entry(fund_cummulative_rets_data_elem, self._get_security_name(analysed_tms.name)) legend_decorator.add_entry(fit_cummulative_rets_data_elem, 'Fit') hist_performance_chart.add_decorator(fund_cummulative_rets_data_elem) hist_performance_chart.add_decorator(fit_cummulative_rets_data_elem) hist_performance_chart.add_decorator(TitleDecorator("Historical Performance")) hist_performance_chart.add_decorator(AxesLabelDecorator(y_label="Cumulative return")) hist_performance_chart.add_decorator(legend_decorator) hist_performance_chart.add_decorator(AxesFormatterDecorator(y_major=PercentageFormatter())) return hist_performance_chart def historical_out_of_sample_performance_chart(self) -> LineChart: analysed_tms = self.model.input_data.analysed_tms frequency = self.model.input_data.frequency fund_cumulative_rets = analysed_tms.to_prices( initial_price=1.0, frequency=frequency) - 1 # type: PricesSeries fit_cumulative_rets = self.model.fitted_tms.to_prices( initial_price=1.0, frequency=frequency) - 1 # type: PricesSeries live_start_date = self.model.oos_start_date in_sample_fund_tms = fund_cumulative_rets.loc[:live_start_date] in_sample_fit_tms = fit_cumulative_rets.loc[:live_start_date] out_of_sample_fund_tms = fund_cumulative_rets.loc[live_start_date:] out_of_sample_fit_tms = fit_cumulative_rets.loc[live_start_date:] colors = Chart.get_axes_colors() in_sample_fund_data_elem = DataElementDecorator(in_sample_fund_tms, color=colors[0]) out_of_sample_fund_data_elem = DataElementDecorator(out_of_sample_fund_tms, color=colors[0]) in_sample_fit_data_elem = DataElementDecorator(in_sample_fit_tms, color=colors[1]) out_of_sample_fit_data_elem = DataElementDecorator(out_of_sample_fit_tms, color=colors[1]) legend_decorator = LegendDecorator(legend_placement=Location.LOWER_RIGHT) legend_decorator.add_entry(in_sample_fund_data_elem, self._get_security_name(analysed_tms.name)) legend_decorator.add_entry(in_sample_fit_data_elem, 'Fit') is_vs_oos_performance_chart = LineChart() is_vs_oos_performance_chart.add_decorator(in_sample_fund_data_elem) is_vs_oos_performance_chart.add_decorator(out_of_sample_fund_data_elem) is_vs_oos_performance_chart.add_decorator(in_sample_fit_data_elem) is_vs_oos_performance_chart.add_decorator(out_of_sample_fit_data_elem) is_vs_oos_performance_chart.add_decorator(AxesFormatterDecorator(y_major=PercentageFormatter())) is_vs_oos_performance_chart.add_decorator(AxesLabelDecorator(y_label="Cumulative return [%]")) is_vs_oos_performance_chart.add_decorator(legend_decorator) is_vs_oos_performance_chart.add_decorator(TextDecorator("In Sample ", x=DataCoordinate(live_start_date), y=AxesCoordinate(0.99), verticalalignment='top', horizontalalignment='right')) is_vs_oos_performance_chart.add_decorator(TextDecorator(" Out Of Sample", x=DataCoordinate(live_start_date), y=AxesCoordinate(0.99), verticalalignment='top', horizontalalignment='left')) last_date = fund_cumulative_rets.index[-1] is_vs_oos_performance_chart.add_decorator(VerticalSpanDecorator(x_min=live_start_date, x_max=last_date)) return is_vs_oos_performance_chart def beta_and_alpha_chart(self, benchmark_coefficients: Sequence[float] = None) -> BarChart: colors_palette = Chart.get_axes_colors() coeff_names = [self._get_security_name(ticker) for ticker in self.model.coefficients.index.values] coeff_values = self.model.coefficients.values bars_colors = [colors_palette[0]] * len(self.model.coefficients) title = 'Coefficients of regressors' if self.model.input_data.is_fit_intercept: coeff_names = np.insert(coeff_names, 0, "intercept") coeff_values = np.insert(coeff_values, 0, self.model.intercept) bars_colors = ['gold'] + bars_colors if benchmark_coefficients is not None: raise ValueError("Benchmark coefficients aren't used when model contains a bias value (constant)") elif benchmark_coefficients is not None: coeff_values -= benchmark_coefficients title = 'Relative coefficients of regressors' index_translator = self._get_index_translator(coeff_names) coefficients = QFSeries(index=pd.Index(coeff_names), data=coeff_values) bar_chart = BarChart(orientation=Orientation.Horizontal, index_translator=index_translator, thickness=self._bars_width, align='center') bar_chart.add_decorator(DataElementDecorator(coefficients, color=bars_colors)) bar_chart.add_decorator(TitleDecorator(title)) bar_chart.add_decorator(AxesLabelDecorator(x_label="sensitivity")) labels = ['{:.2f}'.format(value) for value in coeff_values] self._add_labels_for_bars(bar_chart, coefficients, labels) return bar_chart def performance_attribution_chart(self) -> BarChart: colors_palette = Chart.get_axes_colors() unexplained_ret = self.model.unexplained_performance_attribution_ret factors_ret = self.model.factors_performance_attribution_ret fund_ret = self.model.fund_tms_analysis.cagr unexplained_name = "Unexplained" factors_names = [self._get_security_name(ticker) for ticker in self.model.coefficients.index.values] fund_name = self._get_security_name(self.model.input_data.analysed_tms.name) all_values = [unexplained_ret] + list(factors_ret) + [fund_ret] all_names = [unexplained_name] + list(factors_names) + [fund_name] all_returns = SimpleReturnsSeries(data=all_values, index=pd.Index(all_names)) colors = [colors_palette[0]] + [colors_palette[1]] * len(factors_names) + [colors_palette[2]] index_translator = self._get_index_translator(labels=all_names) bar_chart = BarChart(orientation=Orientation.Horizontal, index_translator=index_translator, thickness=self._bars_width, align='center') bar_chart.add_decorator(DataElementDecorator(all_returns, color=colors)) bar_chart.add_decorator(TitleDecorator("Attribution of Fund Annualised Return")) bar_chart.add_decorator(AxesLabelDecorator(x_label="annualised return [%]")) bar_chart.add_decorator(AxesFormatterDecorator(x_major=PercentageFormatter())) labels = ('{:.2f}'.format(value * 100) for value in all_returns) self._add_labels_for_bars(bar_chart, all_returns, labels) return bar_chart def risk_contribution_chart(self) -> BarChart: colors_palette = Chart.get_axes_colors() tickers = self.model.input_data.regressors_df.columns.values names = [self._get_security_name(ticker) for ticker in tickers] risk_contributions = QFSeries(data=self.model.risk_contribution.values, index=pd.Index(names)) index_translator = self._get_index_translator(labels=names) bar_chart = BarChart(orientation=Orientation.Horizontal, index_translator=index_translator, thickness=self._bars_width, align='center') bar_chart.add_decorator(DataElementDecorator(risk_contributions, color=colors_palette[1])) bar_chart.add_decorator(TitleDecorator("Risk contribution")) bar_chart.add_decorator(AxesLabelDecorator(x_label="risk contribution [%]")) bar_chart.add_decorator(AxesFormatterDecorator(x_major=PercentageFormatter())) labels = ('{:.2f}'.format(value * 100) for value in risk_contributions) self._add_labels_for_bars(bar_chart, risk_contributions, labels, margin=0.001) return bar_chart def correlation_matrix_chart(self) -> HeatMapChart: data = self.model.correlation_matrix names = [self._get_security_name(ticker) for ticker in self.model.correlation_matrix.columns.values] heatmap_chart = HeatMapChart(data, min_value=-1, max_value=1) heatmap_chart.add_decorator(AxisTickLabelsDecorator(axis=Axis.X, labels=names)) heatmap_chart.add_decorator(AxisTickLabelsDecorator(axis=Axis.Y, labels=reversed(names))) heatmap_chart.add_decorator(ValuesAnnotations()) heatmap_chart.add_decorator(ColorBar()) heatmap_chart.add_decorator(TitleDecorator("Correlation matrix")) return heatmap_chart def get_r_squared_info(self) -> str: r_squared_fit = self.model.r_squared # name of the fit should take 35 characters and be right aligned header = ' R-Square Name' equals_separator = '=============================================================' fit_info = ' {r_square:<8.6f} {fit_name:<35.35s}'.format(r_square=r_squared_fit, fit_name='Fit') dash_separator = '-------------------------------------------------------------' r_squared_of_predictors = self.model.r_squared_of_each_predictor predictors_info = [] for ticker, r_squared in r_squared_of_predictors.items(): info = ' {r_squared:<8.6f} {predictor_name:<35.35s}'.format( r_squared=r_squared, predictor_name=self._get_security_name(ticker)) predictors_info.append(info) return '\n'.join([header, equals_separator, fit_info, dash_separator] + predictors_info) + '\n' def get_durbin_watson_test_info(self) -> str: return 'Durbin-Watson test: \n d = {:1.3f}'.format(self.model.durbin_watson_test) def get_autocorrelation_info(self) -> str: auto_correlations = self.model.autocorrelation infos = ['autocorrelations:'] for i, auto_corr in enumerate(auto_correlations, start=1): infos.append(' Autocorrelation (lag {:d}): {:s}'.format(i, str(auto_corr))) return '\n'.join(infos) def get_t_statistics_info(self) -> str: t_values = self.model.t_values if self.model.input_data.is_fit_intercept: t_values = t_values[:-1] # don't take the last t-value which corresponds to the "constant" factor infos = ["t-statistics:"] for ticker, t_val in t_values.items(): infos.append(' {:< 9.3f} {:s}'.format(t_val, self._get_security_name(ticker))) return '\n'.join(infos) def get_p_values_info(self) -> str: p_values = self.model.p_values if self.model.input_data.is_fit_intercept: p_values = p_values[:-1] # don't take the last t-value which corresponds to the "constant" factor infos = ["p-values"] for ticker, p_val in p_values.items(): infos.append(' {:< 9.3f} {:s}'.format(p_val, self._get_security_name(ticker))) return '\n'.join(infos) def get_condition_number_info(self) -> str: cond_number = self.model.condition_number return ' ContidionIndex = {:5.2f}'.format(cond_number) def cooks_distance_chart(self) -> LineChart: cooks_dist = self.model.cooks_distance_tms chart = LineChart() colors = cycle(Chart.get_axes_colors()) color = next(colors) marker_props = {'alpha': 0.7} stemline_props = {'linestyle': '-.', 'linewidth': 0.2} baseline_props = {'visible': False} marker_props['markeredgecolor'] = color marker_props['markerfacecolor'] = color stemline_props['color'] = color data_elem = StemDecorator(cooks_dist, marker_props=marker_props, stemline_props=stemline_props, baseline_props=baseline_props) chart.add_decorator(data_elem) chart.add_decorator(TitleDecorator("Cook's Distance")) chart.add_decorator(AxesLabelDecorator(y_label="max change of coefficients")) return chart def regressors_and_explained_variable_chart(self) -> LineChart: regressors_df = self.model.input_data.regressors_df fund_tms = self.model.input_data.analysed_tms chart = LineChart() legend = LegendDecorator() # add data to the chart and the legend marker_props_template = {'alpha': 0.5} stemline_props_template = {'linestyle': '-.', 'linewidth': 0.2} baseline_props = {'visible': False} regressors_and_fund_df = pd.concat([regressors_df, fund_tms], axis=1) colors = cycle(Chart.get_axes_colors()) for ticker, series in regressors_and_fund_df.items(): marker_props = marker_props_template.copy() stemline_props = stemline_props_template.copy() color = next(colors) marker_props['markeredgecolor'] = color marker_props['markerfacecolor'] = color stemline_props['color'] = color data_elem = StemDecorator(series, marker_props=marker_props, stemline_props=stemline_props, baseline_props=baseline_props) chart.add_decorator(data_elem) legend.add_entry(data_elem, self._get_security_name(ticker)) # add decorators to the chart chart.add_decorator(TitleDecorator("Returns")) chart.add_decorator(AxesLabelDecorator(y_label="return [%]")) chart.add_decorator(legend) chart.add_decorator(AxesFormatterDecorator(y_major=PercentageFormatter())) return chart def enet_coeffs_chart(self): if self._enet_factors_identifier is None: return None return self._enet_factors_identifier.coeffs_chart def enet_mse_chart(self): if self._enet_factors_identifier is None: return None return self._enet_factors_identifier.mse_chart def _get_index_translator(self, labels): tick_locations = range(len(labels)) labels_to_locations_dict = dict(zip(labels, tick_locations)) return IndexTranslator(labels_to_locations_dict) def _add_labels_for_bars(self, bar_chart, values_series, labels, margin=0.002): index_translator = bar_chart.index_translator x_positions = values_series.apply(lambda x: max(0, x)).values + margin y_positions = index_translator.translate(values_series.index.values) for x_pos, y_pos, label in zip(x_positions, y_positions, labels): text_decorator = TextDecorator(label, y=DataCoordinate(y_pos), x=DataCoordinate(x_pos), clip_on=False, verticalalignment='center', horizontalalignment='left') bar_chart.add_decorator(text_decorator) def _get_security_name(self, ticker): if not isinstance(ticker, str): ticker = ticker.as_string() return self.ticker_to_security_name_dict.get(ticker, ticker)