Source code for qf_lib.common.utils.miscellaneous.consecutive_duplicates

#     Copyright 2016-present CERN – European Organization for Nuclear Research
#
#     Licensed under the Apache License, Version 2.0 (the "License");
#     you may not use this file except in compliance with the License.
#     You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#     Unless required by applicable law or agreed to in writing, software
#     distributed under the License is distributed on an "AS IS" BASIS,
#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#     See the License for the specific language governing permissions and
#     limitations under the License.

from enum import Enum

from qf_lib.containers.series.qf_series import QFSeries


[docs]class Method(Enum): KEEP_FIRST = 1 """Keep the first occurrence of the value""" KEEP_LAST = 2 """Keep the last occurrence of the value"""
[docs]def drop_consecutive_duplicates(series: QFSeries, method) -> QFSeries: """ Removes consecutive duplicates (e.g. 3 consecutive 1 values should be merged into one with a date of the first/last occurrence in the series). Parameters ---------- series: QFSeries series from which the consecutive duplicates should be removed method Method.KEEP_FIRST: keep the first occurrence Method.KEEP_LAST: keep the last occurrence Returns ------- QFSeries the series of the first occurrences of the values in each of consecutive series of same values. For example: original series: - 2010-01-01 True - 2010-01-02 False - 2010-01-03 False - 2010-01-04 False - 2010-01-05 True - 2010-01-06 True - 2010-01-07 False - 2010-01-08 False after removing duplicates: - 2010-01-01 True - 2010-01-02 False - 2010-01-05 True - 2010-01-07 False """ if method == Method.KEEP_FIRST: shift = 1 elif method == Method.KEEP_LAST: shift = -1 else: raise ValueError("Incorrect method: {:s}".format(str(method))) is_not_duplicate_series = series.shift(shift) != series no_duplicates_series = series.loc[is_not_duplicate_series] return no_duplicates_series