Performance testing results

We provide benchmarks for the prediction and recommendation endpoints for different algorithms using our reference machine of four workers sending 1000 requests to the recommendation server. The results for each algorithm are detailed below.

Setup

[2]:
import json
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
[3]:
class ConfigReader:
    def get_value(self, key):
        with open('config.json') as json_data_file:
            data = json.load(json_data_file)
        return data[key]
[20]:
results_folder = "results/"

def print_stats_from_file(file_name):
    obj = pickle.load(open(results_folder + file_name, "rb"))
    times = obj['times']
    time_taken_all = obj['time_taken_all']
    num_requests = obj['num_requests']
    print(f'Number of requests: {num_requests}')
    print(f'Total response time: {round(time_taken_all, 6)}')
    print(f'Throughput (requests per second): {round(num_requests / time_taken_all, 6)}')
    print(f'Peak response time: {round(max(times), 6)}')
    print(f'Mean response time: {round(np.mean(times), 6)}')
    print(f'Median response time: {round(np.median(times), 6)}')
    print(f'99 percentile: {round(np.quantile(times, 0.99), 6)}')

def plot_numbers(file_name):
    obj = pickle.load(open(results_folder + file_name, "rb"))
    resp_time_per_request = obj['times']
    plt.plot(resp_time_per_request)
    plt.show()

def hist_numbers(file_name):
    obj = pickle.load(open(results_folder + file_name, "rb"))
    resp_time_per_request = obj['times']
    plt.hist(resp_time_per_request, bins='auto')
    plt.title('Number of requests over time')
    plt.xlabel('Time')
    plt.ylabel('Requests')
    plt.show()

def create_df(file_name, algo, num_workers):
    obj = pickle.load(open(results_folder + file_name, "rb"))
    df = pd.DataFrame(obj['times'])
    df['Workers'] = num_workers
    df['Algorithm'] = algo
    df.rename(columns={0:'Time'}, inplace=True)
    return df

Get config values

[6]:
reader = ConfigReader()
n_rand_users = num_requests = reader.get_value("num_requests")
base_url = reader.get_value("rec_server_base_url")
n_recs = reader.get_value("n_recs")
items = reader.get_value("items")
pred_algos = reader.get_value("pred_algos")
rec_algos = reader.get_value("rec_algos")
lk_recserver_algos = reader.get_value('lk_recserver_algos')
linear_speedup_algos = reader.get_value("linear_speedup_algos")
workers_config = reader.get_value("workers_config")

Show results

Predict and recommend endpoints from server for canonical config

Predictions for different algorithms

[5]:
for algo in pred_algos:
    print(f'Algorithm: {algo}')
    file_name = f'preds_{algo}_workers_4_num_req_{num_requests}.pickle'
    print_stats_from_file(file_name)
    hist_numbers(file_name)
    print('---------------------')
    print('')
Algorithm: bias
Number of requests: 1000
Total response time: 10.96
Throughput (requests per second): 91.242
Peak response time: 0.101
Mean response time: 0.087
99 percentile: 0.095
_images/performance_10_1.png
---------------------

Algorithm: itemitem
Number of requests: 1000
Total response time: 51.418
Throughput (requests per second): 19.448
Peak response time: 3.846
Mean response time: 0.407
99 percentile: 1.342
_images/performance_10_3.png
---------------------

Algorithm: useruser
Number of requests: 1000
Total response time: 21.567
Throughput (requests per second): 46.367
Peak response time: 0.204
Mean response time: 0.171
99 percentile: 0.192
_images/performance_10_5.png
---------------------

Algorithm: biasedmf
Number of requests: 1000
Total response time: 10.873
Throughput (requests per second): 91.972
Peak response time: 0.103
Mean response time: 0.086
99 percentile: 0.094
_images/performance_10_7.png
---------------------

Algorithm: implicitmf
Number of requests: 1000
Total response time: 10.826
Throughput (requests per second): 92.373
Peak response time: 0.105
Mean response time: 0.086
99 percentile: 0.095
_images/performance_10_9.png
---------------------

Algorithm: funksvd
Number of requests: 1000
Total response time: 10.826
Throughput (requests per second): 92.368
Peak response time: 0.101
Mean response time: 0.085
99 percentile: 0.094
_images/performance_10_11.png
---------------------

Algorithm: tf_bpr
Number of requests: 1000
Total response time: 19.537
Throughput (requests per second): 51.186
Peak response time: 0.33
Mean response time: 0.155
99 percentile: 0.199
_images/performance_10_13.png
---------------------

Recommendations

[6]:
for algo_rec in rec_algos:
    print(f'Algorithm: {algo_rec}')
    file_name = f'recs_{algo_rec}_workers_4_num_req_{num_requests}.pickle'
    print_stats_from_file(file_name)
    hist_numbers(file_name)
    print('---------------------')
    print('')
Algorithm: popular
Number of requests: 1000
Total response time: 0.682
Throughput (requests per second): 1467.233
Peak response time: 0.01
Mean response time: 0.005
99 percentile: 0.008
_images/performance_12_1.png
---------------------

Lenskit

[9]:
def create_df_lk(file_name, algo, type_result):
    obj = pickle.load(open(results_folder + file_name, "rb"))
    df = pd.DataFrame(obj['times'])
    df['Type'] = type_result
    df['Algorithm'] = algo
    df.rename(columns={0:'Time'}, inplace=True)
    return df
[21]:
for lk_recserver_algo in lk_recserver_algos:
    print(f'Algo: {lk_recserver_algo}')
    print('------------------')
    print('Lenskit performance:')
    file_name = f'lkpy_{lk_recserver_algo}_num_req_{num_requests}.pickle'
    print_stats_from_file(file_name)
#    hist_numbers(file_name)
    print('------------------')
    print('Recommendation server performance:')
    file_name = f'preds_{lk_recserver_algo}_against_lkpy_workers_4_num_req_{num_requests}.pickle'
    print_stats_from_file(file_name)
#    hist_numbers(file_name)

    # boxplot
    df = pd.DataFrame({'Times': [], 'Type': [], 'Algorithm': []})
    file_name = f'lkpy_{lk_recserver_algo}_num_req_{num_requests}.pickle'
    df = df.append(create_df_lk(file_name, lk_recserver_algo, 'Lenskit'), ignore_index=True)
    file_name = f'preds_{lk_recserver_algo}_against_lkpy_workers_4_num_req_{num_requests}.pickle'
    df = df.append(create_df_lk(file_name, lk_recserver_algo, 'Recommendation Server'), ignore_index=True)
    plt.title('Requests time')
    ax = sns.boxplot(x="Type", y="Time", data=df)
    plt.show()

    print('*******************************************************')
Algo: bias
------------------
Lenskit performance:
Number of requests: 1000
Total response time: 0.494293
Throughput (requests per second): 2023.090986
Peak response time: 0.006024
Mean response time: 0.000424
Median response time: 0.000417
99 percentile: 0.000445
------------------
Recommendation server performance:
Number of requests: 1000
Total response time: 10.940877
Throughput (requests per second): 91.400348
Peak response time: 0.10275
Mean response time: 0.086518
Median response time: 0.086157
99 percentile: 0.096198
_images/performance_15_1.png
*******************************************************
Algo: biasedmf
------------------
Lenskit performance:
Number of requests: 1000
Total response time: 0.341478
Throughput (requests per second): 2928.449627
Peak response time: 0.005458
Mean response time: 0.000272
Median response time: 0.000265
99 percentile: 0.000295
------------------
Recommendation server performance:
Number of requests: 1000
Total response time: 10.87211
Throughput (requests per second): 91.978464
Peak response time: 0.101846
Mean response time: 0.085913
Median response time: 0.085471
99 percentile: 0.095898
_images/performance_15_3.png
*******************************************************
Algo: itemitem
------------------
Lenskit performance:
Number of requests: 1000
Total response time: 101.676548
Throughput (requests per second): 9.83511
Peak response time: 2.22426
Mean response time: 0.101508
Median response time: 0.050559
99 percentile: 0.679383
------------------
Recommendation server performance:
Number of requests: 1000
Total response time: 52.110227
Throughput (requests per second): 19.190091
Peak response time: 3.86636
Mean response time: 0.412121
Median response time: 0.342963
99 percentile: 1.32436
_images/performance_15_5.png
*******************************************************
Algo: tf_bpr
------------------
Lenskit performance:
Number of requests: 1000
Total response time: 26.68567
Throughput (requests per second): 37.473296
Peak response time: 0.200347
Mean response time: 0.02661
Median response time: 0.025739
99 percentile: 0.029865
------------------
Recommendation server performance:
Number of requests: 1000
Total response time: 20.59358
Throughput (requests per second): 48.558822
Peak response time: 0.199767
Mean response time: 0.163299
Median response time: 0.162994
99 percentile: 0.184859
_images/performance_15_7.png
*******************************************************

Let’s see in more detail the ItemItem response time.

[17]:
file_name = f'lkpy_itemitem_num_req_{num_requests}.pickle'
hist_numbers(file_name)
_images/performance_17_0.png
[16]:
file_name = f'preds_itemitem_against_lkpy_workers_4_num_req_{num_requests}.pickle'
hist_numbers(file_name)
_images/performance_18_0.png

Speedup Tests

Throughput by number of workers

[8]:
for algo in linear_speedup_algos:
    print(f'Algo: {algo}')
    throughput_file_name_workers = f'{results_folder}throughput_single_multiple_workers_algo_{algo}.csv'
    throughputs_workers_from_file = np.genfromtxt(throughput_file_name_workers, delimiter=',')
    y_pos = np.arange(len(throughputs_workers_from_file))
#    plt.bar(y_pos, throughputs_workers_from_file, align='center', alpha=0.5)
    plt.plot(workers_config, throughputs_workers_from_file, 'b--')
#    plt.xticks(y_pos, workers_config)
    plt.ylabel('Throughput')
    plt.xlabel('Number of processes')
    plt.title('Throughput by workers')

    plt.show()
    print('*******************************************************')
Algo: biasedmf
_images/performance_21_1.png
*******************************************************
Algo: itemitem
_images/performance_21_3.png
*******************************************************
Algo: tf_bpr
_images/performance_21_5.png
*******************************************************

Plots by number of workers

[7]:
df = pd.read_csv(f'{results_folder}throughput_single_multiple_workers_algo_tf_bpr.csv', header=None)
df['Workers'] = workers_config
df.rename(columns={0:'Throughput'}, inplace=True)
df
[7]:
Throughput Workers
0 11.880536 1
1 25.505847 2
2 50.071827 4
3 92.440922 8
4 137.375141 12
5 174.451148 16
6 225.043659 24
7 298.307484 32
8 355.671852 48
9 422.071596 80
10 434.162142 96
[8]:
sns.set(rc={'figure.figsize':(15, 10)})
for algo in linear_speedup_algos:
    df = pd.DataFrame({'Times': [], 'Workers': [], 'Algorithm': []})
    print(f'Algorithm: {algo}')
    for w in workers_config:
        file_name = f'linear_speedup_preds_{algo}_workers_{w}_num_req_1000.pickle'
        df_temp = create_df(file_name, algo, w)
        df = df.append(df_temp, ignore_index=True)
    plt.title('Requests time')
    ax = sns.boxplot(x="Workers", y="Time", data=df)
    plt.show()
    print('---------------------')
    print('')
#    break
Algorithm: biasedmf
_images/performance_24_1.png
---------------------

Algorithm: itemitem
_images/performance_24_3.png
---------------------

Algorithm: tf_bpr
_images/performance_24_5.png
---------------------

[13]:
for w in workers_config:
    print(f"Worker: {w}, Mean Time: {df[df['Workers'] == w]['Time'].mean()}")
Worker: 1, Mean Time: 0.16933080368420633
Worker: 2, Mean Time: 0.1678498732119333
Worker: 4, Mean Time: 0.1769776633818401
Worker: 8, Mean Time: 0.19603982038088724
Worker: 12, Mean Time: 0.19488702520968218
Worker: 16, Mean Time: 0.20030749574633955
Worker: 24, Mean Time: 0.18364940203385777
Worker: 32, Mean Time: 0.21081128295401869
Worker: 48, Mean Time: 0.2691085608160647
Worker: 80, Mean Time: 0.3322938989320246
Worker: 96, Mean Time: 0.3337401035259827
[ ]: