Source code for qf_lib.data_providers.yfinance.yfinance_data_provider

#     Copyright 2016-present CERN – European Organization for Nuclear Research
#
#     Licensed under the Apache License, Version 2.0 (the "License");
#     you may not use this file except in compliance with the License.
#     You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#     Unless required by applicable law or agreed to in writing, software
#     distributed under the License is distributed on an "AS IS" BASIS,
#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#     See the License for the specific language governing permissions and
#     limitations under the License.
import warnings
from datetime import datetime
from typing import Set, Type, Union, Sequence, Dict, Optional

from pandas import MultiIndex

from qf_lib.common.enums.frequency import Frequency
from qf_lib.common.enums.price_field import PriceField
from qf_lib.common.tickers.tickers import Ticker
from qf_lib.common.utils.dateutils.relative_delta import RelativeDelta
from qf_lib.common.utils.dateutils.timer import Timer
from qf_lib.common.utils.miscellaneous.to_list_conversion import convert_to_list
from qf_lib.containers.dataframe.qf_dataframe import QFDataFrame
from qf_lib.containers.qf_data_array import QFDataArray
from qf_lib.containers.series.qf_series import QFSeries
from qf_lib.data_providers.abstract_price_data_provider import AbstractPriceDataProvider
from qf_lib.data_providers.helpers import normalize_data_array
from qf_lib.data_providers.yfinance.yfinance_ticker import YFinanceTicker

try:
    import yfinance as yf

    is_yfinance_intalled = True
except ImportError:
    is_yfinance_intalled = False


[docs]class YFinanceDataProvider(AbstractPriceDataProvider): """ Data Provider using the yfinance library to provide historical data of various frequencies. Parameters ----------- timer: Timer Might be either SettableTimer or RealTimer depending on the use case. If no parameter is passed, a default RealTimer object will be used. """ def __init__(self, timer: Optional[Timer] = None): super().__init__(timer) if not is_yfinance_intalled: warnings.warn("yfinance ist not installed. If you would like to use YFinanceDataProvider first install the" " yfinance library.") exit(1)
[docs] def price_field_to_str_map(self, *args) -> Dict[PriceField, str]: return { PriceField.Open: 'Open', PriceField.High: 'High', PriceField.Low: 'Low', PriceField.Close: 'Close', PriceField.Volume: 'Volume' }
[docs] def get_history(self, tickers: Union[YFinanceTicker, Sequence[YFinanceTicker]], fields: Union[None, str, Sequence[str]], start_date: datetime, end_date: datetime = None, frequency: Frequency = None, look_ahead_bias: bool = False, **kwargs) -> Union[QFSeries, QFDataFrame, QFDataArray]: """ Gets historical attributes (fields) of different securities (tickers). Parameters ---------- tickers: YFinanceTicker, Sequence[YFinanceTicker] tickers for securities which should be retrieved fields: None, str, Sequence[str] fields of securities which should be retrieved. start_date: datetime date representing the beginning of historical period from which data should be retrieved end_date: datetime date representing the end of historical period from which data should be retrieved; if no end_date was provided, by default the current date will be used frequency: Frequency frequency of the data. This data provider supports Monthly, Weekly, Daily frequencies along with intraday frequencies at the following intervals: 60, 30, 15, 5 and 1 minute. look_ahead_bias: bool if set to False, the look-ahead bias will be taken care of to make sure no future data is returned Returns ------- QFSeries, QFDataFrame, QFDataArray, float, str If possible the result will be squeezed, so that instead of returning a QFDataArray, data of lower dimensionality will be returned. The results will be either a QFDataArray (with 3 dimensions: date, ticker, field), a QFDataFrame (with 2 dimensions: date, ticker or field; it is also possible to get 2 dimensions ticker and field if single date was provided), a QFSeries (with 1 dimensions: date) or a float / str (in case if a single ticker, field and date were provided). If no data is available in the database or a non existing ticker was provided an empty structure (nan, QFSeries, QFDataFrame or QFDataArray) will be returned. """ frequency = frequency or self.frequency or Frequency.DAILY original_end_date = (end_date or self.timer.now()) + RelativeDelta(second=0, microsecond=0) # In case of low frequency (daily or lower) shift the original end date to point to 23:59 if frequency <= Frequency.DAILY: original_end_date += RelativeDelta(hour=23, minute=59) end_date = original_end_date if look_ahead_bias else ( self.get_end_date_without_look_ahead(original_end_date, frequency)) start_date = self._adjust_start_date(start_date, frequency) got_single_date = self._got_single_date(start_date, original_end_date, frequency) tickers, got_single_ticker = convert_to_list(tickers, YFinanceTicker) fields, got_single_field = convert_to_list(fields, (PriceField, str)) tickers_str = [t.as_string() for t in tickers] df = yf.download(list(set(tickers_str)), start_date, end_date, keepna=True, interval=self._frequency_to_period(frequency), progress=False) df = df.reindex(columns=MultiIndex.from_product([fields, tickers_str])) values = df.values.reshape(len(df), len(tickers), len(fields)) qf_data_array = QFDataArray.create(df.index.rename("dates"), tickers, fields, values) return normalize_data_array( qf_data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=False )
[docs] def supported_ticker_types(self) -> Set[Type[Ticker]]: return {YFinanceTicker}
@staticmethod def _frequency_to_period(freq: Frequency): frequencies_mapping = { Frequency.MIN_1: '1m', Frequency.MIN_5: '5m', Frequency.MIN_15: '15m', Frequency.MIN_30: '30m', Frequency.MIN_60: '60m', Frequency.DAILY: '1d', Frequency.WEEKLY: '1wk', Frequency.MONTHLY: '1mo', Frequency.QUARTERLY: '3mo', } try: return frequencies_mapping[freq] except KeyError: raise ValueError(f"Frequency must be one of the supported frequencies: {frequencies_mapping.keys()}.") \ from None