Source code for secScraper.display

import matplotlib
# matplotlib.use('Qt5Agg')  # Use another backend
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import os


[docs]def run_from_ipython():
    """
    Check if the script is run from command line or from a Jupyter Notebook.

    :return: bool that is True if run from Jupyter Notebook
    """
    try:
        __IPYTHON__
        return True
    except NameError:
        return False


[docs]def histogram_width(qtr_metric_result, metrics, s):
    center = []
    
    for qtr in s['list_qtr'][s['lag']:]:
        # print("Values", qtr_metric_result)
        try:
            center.append(matplotlib.dates.date2num(qtr_metric_result[qtr]['0']['published']))
        except:
            pass
    center = np.array(center)
    diff_center = np.diff(center)  # Distance between two groups of histograms
    r = s['histogram_date_span_ratio']
    return int(min(diff_center)*r/len(metrics))  # Based on the min of that distance


[docs]def diff_vs_stock(qtr_metric_result, ticker_data, ticker, s, method='diff'):
    """
    Display the calculated data for a given ticker across the time_range that was specified.

    :param qtr_metric_result: Dictionary containing the data to plot
    :param ticker_data: Daily stock value for the ticker considered
    :param ticker: Company ticker on the US stock exchange
    :param s: Settings dictionary
    :param method: Specify if a difference between two reports or an analysis of each report.
    :return: void
    """
    # 0. Select the type of plot
    if method == 'diff':
        metrics = s['diff_metrics']
    elif method == 'sentiment':
        metrics = s['sing_metrics']
    else:
        raise ValueError('[ERROR] Method unknown')

    if not metrics:
        raise ValueError("[ERROR] No metrics were computed for method {}.".format(method))

    # 1. Display the stock data
    lists = sorted(ticker_data.items())  # sorted by key, return a list of tuples
    benchmark_x, data_y = zip(*lists)  # unzips the dates & financial data
    #benchmark_x = matplotlib.dates.date2num(benchmark_x)
    # At this point, y is a list of lists. We need to extract the price from it.
    benchmark_y, market_cap = zip(*data_y)  # Will crash if len(y) > 2? Or ignore the rest?
    benchmark = zip(benchmark_x, benchmark_y)
    
    # 2. Display the histogram
    width = histogram_width(qtr_metric_result, metrics, s)

    # Now we plot all the histograms, one metric at a time
    
    metric_data = list()
    for ii, m in enumerate(metrics):
        # Get the data for each quarter on that metric
        x = list()
        y = list()
        for idx, qtr in enumerate(qtr_metric_result):
            if len(qtr_metric_result[qtr]) == 0:
                print("[ERROR] No data for qtr {}?".format(qtr))
                continue
            else:
                #print("qtr", qtr)
                #print("qtr_metric", qtr_metric_result[qtr])
                #print(qtr_metric_result)
                center = matplotlib.dates.date2num(qtr_metric_result[qtr]['0']['published'])
                if method == 'diff':
                    position = center - width*(len(metrics))/2 + width*ii
                elif method == 'sentiment':
                    position = center
                x.append(matplotlib.dates.num2date(position).date())
                y.append(qtr_metric_result[qtr]['total'][m])

        metric_data.append(zip(x, y))
    
    return benchmark, metric_data
    


[docs]def plot_diff_vs_stock(benchmark, metric_data, ticker, s, method='diff'):
    # 0. Select the type of plot
    if method == 'diff':
        metrics = s['diff_metrics']
    elif method == 'sentiment':
        metrics = s['sing_metrics']
    else:
        raise ValueError('[ERROR] Method unknown')
    
    
    fig, ax1 = plt.subplots(figsize=(15, 5))
    #benchmark_x, benchmark_y = zip(*benchmark)
    
    ax1.plot_date(*zip(*benchmark), ms=1)
    ax1.set_ylabel('Stock price [$]', fontsize=16)
    ax1.set_xlabel('Historical data', fontsize=16)
    ax1.set_title('Similarity scores vs daily {} price for ticker {}'
                  .format(s['type_daily_price'], ticker), fontsize=20)
    
    # Make second axis plot
    ax2 = ax1.twinx()
    for idx, data in enumerate(metric_data):
        x, y = zip(*data)
        plt.bar(x, y, label=metrics[idx], width=6, linestyle='-')
    
    if method == 'diff':
        ax2.set_ylim([0, 1])
        ax2.set_ylabel('Metric similarity [0-1]', fontsize=16)
    elif method == 'sentiment':
        abs_max = max(abs(max(y)), abs(min(y)))
        ax2.set_ylim([-abs_max, abs_max])
        ax2.set_ylabel('Composite sentiment [0-1]', fontsize=16)

    ax2.get_xaxis().set_visible(False)
    
    plt.legend()
    plt.savefig(os.path.join(s['path_output_folder'], '{}_{}_View_{}.png'.format(x[0].strftime('%Y%m%d'), x[-1].strftime('%Y%m%d'), ticker)))
    if run_from_ipython():
        plt.show()
    else:
        plt.close(fig)


[docs]def diff_vs_benchmark(pf_values, index_name, index_data, diff_method, s, norm_by_index=False):
    """
    Plot a portfolio vs an index.

    :param pf_values: Value of the portfolio over time.
    :param index_name: Name of the index.
    :param index_data: Daily value of the index.
    :param s: Settings dictionary.
    :return: void
    """
    # fig = plt.figure(figsize=(10, 5))

    """Display an index"""
    benchmark_x = []
    benchmark_y = []
    for qtr in s['list_qtr'][s['lag']:]:
        qtr_start_date = "{}{}{}".format(str(qtr[0]), str((qtr[1]-1)*3+1).zfill(2), '01')
        qtr_start_date = datetime.strptime(qtr_start_date, '%Y%m%d').date()
        # days, _ = zip(*index_data[index_name])
        days, prices = zip(*index_data[index_name].items())
        for _ in range(7):
            try:
                idx = days.index(qtr_start_date)
                break
            except ValueError:  # The stock exchange was closed that day. Move to the next one.
                qtr_start_date = qtr_start_date.strftime('%Y%m%d')
                day = str(int(qtr_start_date[7]) + 1)
                qtr_start_date = qtr_start_date[:7] + day
                qtr_start_date = datetime.strptime(qtr_start_date, '%Y%m%d').date()
        try:
            benchmark_x.append(qtr_start_date)
            benchmark_y.append(prices[idx][0])  # Only one entry per timestamp
            
        except KeyError:
            raise KeyError('[ERROR] The stock exchange should not have been shut down for more than 7 days.')
    benchmark_y = [value*s['pf_init_value']/benchmark_y[0] for value in benchmark_y]
    
    """Norm by index or not?"""
    if norm_by_index:
        norm = benchmark_y
        benchmark_y = [-s['pf_init_value']]*len(norm)  # Nullify the index data
    else:
        norm = [1]*len(benchmark_y)
        # plt.plot_date(benchmark_x, benchmark_y, label=index_name, linestyle='-.', linewidth=2, ms=10, marker=',')
    benchmark = zip(benchmark_x, benchmark_y)  # Zip for plotting
    
    """Display all the quintiles/deciles"""
    # bin_data = list()
    bin_data = dict()
    for l in s['bin_labels']:
        x = list()
        y = list()
        for qtr in s['list_qtr'][s['lag']:]:
            start = "{}{}{}".format(str(qtr[0]), str(((qtr[1])-1)*3+1).zfill(2), '01')
            x.append(datetime.strptime(start, '%Y%m%d').date())
            y.append(pf_values[diff_method][l][qtr][0])
        y = [qx_value/benchmark_value for qx_value, benchmark_value in zip(y, norm)]
        # plt.plot_date(x, y, label=l, linestyle='-')
        #single_bin_data = zip(x, y)
        #bin_data.append(single_bin_data)
        bin_data[l] = zip(x, y)
    
    # Actually plot now that all the data is available
    return benchmark, bin_data


[docs]def diff_vs_benchmark_ns(pf_values, index_name, index_data, diff_method, s, norm_by_index=False):
    """
    Plot a portfolio vs an index.

    :param pf_values: Value of the portfolio over time.
    :param index_name: Name of the index.
    :param index_data: Daily value of the index.
    :param s: Settings dictionary.
    :return: void
    """

    """Display an index"""
    benchmark_x = []
    benchmark_y = []
    for qtr in s['list_qtr'][s['lag']:]:
        qtr_start_date = "{}{}{}".format(str(qtr[0]), str((qtr[1]-1)*3+1).zfill(2), '01')
        qtr_start_date = datetime.strptime(qtr_start_date, '%Y%m%d').date()
        # days, _ = zip(*index_data[index_name])
        days, prices = zip(*index_data[index_name].items())
        
        # Find the nearest daily price
        for _ in range(7):
            try:
                idx = days.index(qtr_start_date)
                break
            except ValueError:  # The stock exchange was closed that day. Move to the next one.
                qtr_start_date = qtr_start_date.strftime('%Y%m%d')
                day = str(int(qtr_start_date[7]) + 1)
                qtr_start_date = qtr_start_date[:7] + day
                qtr_start_date = datetime.strptime(qtr_start_date, '%Y%m%d').date()
        try:
            benchmark_x.append(qtr_start_date)
            benchmark_y.append(prices[idx][0])  # Only one entry per timestamp
            
        except KeyError:
            raise KeyError('[ERROR] The stock exchange should not have been shut down for more than 7 days.')
    benchmark_y = [value*s['pf_init_value']/benchmark_y[0] for value in benchmark_y]
    
    """Norm by index or not?"""
    if norm_by_index:
        norm = benchmark_y
        benchmark_y = [-s['pf_init_value']]*len(norm)  # Nullify the index data
    else:
        norm = [1]*len(benchmark_y)
        # plt.plot_date(benchmark_x, benchmark_y, label=index_name, linestyle='-.', linewidth=2, ms=10, marker=',')
    benchmark = zip(benchmark_x, benchmark_y)  # Zip for plotting
    
    """Display all the quintiles/deciles"""
    bin_data = dict()
    for l in s['bin_labels']:
        x = list()
        y = list()
        for qtr in s['list_qtr'][s['lag']:]:
            # Assign the quarterly value to tghe first day of the quarter
            start = "{}{}{}".format(str(qtr[0]), str(((qtr[1])-1)*3+1).zfill(2), '01')
            x.append(datetime.strptime(start, '%Y%m%d').date())
            y.append(pf_values[diff_method][qtr]['incoming_value'][l])  # Use value before taxes
        # Divide each value by the norm. 
        # It is 1 if not norm_by_index, or the value of the index otherwise
        y = [qx_value/benchmark_value for qx_value, benchmark_value in zip(y, norm)]
        # plt.plot_date(x, y, label=l, linestyle='-')
        
        # Zip the resulting plot data 
        bin_data[l] = zip(x, y)
    
    # Actually plot now that all the data is available
    return benchmark, bin_data


[docs]def plot_diff_vs_benchmark(benchmark, bin_data, index_name, s):
    # bin_data is a list
    
    nb_bins = len(bin_data)
    if nb_bins == 5:
        prefix = 'Q'
    elif nb_bins == 10:
        prefix = 'D'
    else:
        raise ValueError('[ERROR] Found {} bins. This is not supported yet'.format(nb_bins))
    
    fig = plt.figure(figsize=(10, 5))
    benchmark_x, benchmark_y = zip(*benchmark)

    if benchmark_y[0] != -s['pf_init_value']:  # No benchmark displayed
        plt.plot_date(benchmark_x, benchmark_y, label=index_name, linestyle='-.', linewidth=2, ms=10, marker=',')

    for idx, l in enumerate(bin_data):
        x, y = zip(*bin_data[l])
        # bin_name = prefix + str(idx+1)
        plt.plot_date(x, y, label=l, linestyle='-')  # Label is given by the key
    
    plt.legend()
    plt.title('Portfolio benchmark against {} for different bins'.format(index_name), fontsize=20)
    plt.xlabel('Historical data', fontsize=16)
    plt.ylabel('Portfolio value', fontsize=16)

    plt.savefig(os.path.join(s['path_output_folder'], '{}_{}_Benchmark_{}.png'.format(x[0].strftime('%Y%m%d'), x[-1].strftime('%Y%m%d'), index_name)))
    if run_from_ipython():
        plt.show()
    else:
        plt.close(fig)

[docs]def update_ax_diff_vs_benchmark(ax, benchmark, bin_data, index_name, s, ylim, m):
    # bin_data is a list
    
    nb_bins = len(bin_data)
    if nb_bins == 5:
        prefix = 'Q'
    elif nb_bins == 10:
        prefix = 'D'
    else:
        raise ValueError('[ERROR] Found {} bins. This is not supported yet'.format(nb_bins))
    
    # fig = plt.figure(figsize=(10, 5))
    benchmark_x, benchmark_y = zip(*benchmark)

    if benchmark_y[0] != -s['pf_init_value']:  # No benchmark displayed
        ax.plot_date(benchmark_x, benchmark_y, label=index_name, linestyle='-.', linewidth=2, ms=10, marker=',')

    for idx, l in enumerate(bin_data):
        x, y = zip(*bin_data[l])
        # bin_name = prefix + str(idx+1)
        ax.plot_date(x, y, label=l, linestyle='-')  # Label is given by the key
    
    ax.legend()
    ax.set_title('{} against {}'.format(m, index_name))
    #ax.set_xlabel('Historical data', fontsize=16)
    #ax.set_ylabel('Portfolio value', fontsize=16)
    if ylim:
        ax.set_ylim(ylim)

    #plt.savefig(os.path.join(s['path_output_folder'], '{}_{}_Benchmark_{}.png'.format(x[0].strftime('%Y%m%d'), x[-1].strftime('%Y%m%d'), index_name)))
    #if run_from_ipython():
        #plt.show()
    #else:
        #plt.close(fig)