blob: 0fd02c75c883654c49e2c7ff71ea0836e94d2ca5 [file] [log] [blame]
"""Plot measurement data.
Run instructions:
python3 -m venv env
source env/activate
pip install -r experimental/requirements.lock
python experimental/cache_analysis.py cache_size_results.csv
"""
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
import io
import numpy as np
import pandas as pd
from statistics import mean, median
import collections
import csv
import seaborn as sns
import os
import sys
def draw_single_run_plot(data, name):
""" This functions draws and saves two types of plots for cases where each x value has only one y value.
Here it is used to draw plots for stats (ie mean and median of all runs).
It can be used for drawing each run individually.
Args:
data:
A dictionary from x values to their corresponding y value
name:
The file name that is used for saving the plot
"""
figure(num=None, figsize=(10, 8), dpi=80, facecolor='w', edgecolor='k')
x = list(data.keys())
y = list(data.values())
plt.subplot(211)
plt.plot(x,y)
plt.subplot(212)
plt.scatter(x,y)
plt.savefig(name)
plt.cla()
def draw_boxplot(data, sizes, name):
""" This functions draws and saves boxplot for all data points.
Args:
data:
A list of lists, where each list contains the timing values for each size under analysis
lists are in the asecending order of the sizes values (e.g. timing values for the smallest size corresponds to data[0])
sizes:
The list of sizes under analysis that is used as the values of x axis
name:
The file name that is used for saving the plot
"""
fig = plt.figure(figsize=(30, 10))
ax = fig.add_subplot(111)
boxes = []
for l in data:
for i, time in enumerate(l):
boxes.append([sizes[i%len(l)], time])
dd = pd.DataFrame(boxes, columns = ["size (bytes)", "time (cycles)"])
sns.set_theme(style="whitegrid")
ax = sns.boxplot(data = dd, showfliers=False, x = 'size (bytes)', y = 'time (cycles)')
ax.grid(False)
fig = ax.get_figure()
fig.savefig(name)
def draw_all_datapoints(data, sizes, name):
""" This functions draw and saves all data points as individual plots in a single figure in log scale
Args:
data:
A list of lists, where each list contains the timing values for each size under analysis
lists are in the asecending order of the sizes values (e.g. timing values for the smallest size corresponds to data[0])
sizes:
The list of sizes under analysis that is used as the values of x axis
name:
The file name that is used for saving the plot
"""
fig = plt.figure(figsize=(30, 10))
ax = fig.add_subplot(111)
for l in data:
ax.scatter(sizes,l)
ax.set_yscale('log')
ax.set_xscale('log', base = 2)
plt.xlabel("size (bytes)")
plt.ylabel("time (cycles)")
plt.title(" all data points")
plt.grid(False)
plt.savefig(name)
plt.cla()
def main():
if len(sys.argv) != 2:
sys.exit(f"Expected 1 argument, got {len(sys.argv) - 1}")
# read csv file to extract data
df = pd.read_csv(sys.argv[1], header=None)
count_row = df.shape[0] # gives number of row count
sizes = df[0].unique() # sizes used for the analysis
iterations = int(count_row/len(sizes)) # total experiments' repetitions
print(f"In total the experiment was repeated {iterations} times")
print(f"In total {len(sizes)} different values were tested")
# extract all runs seperately
all = []
pre = 0
for i in range(1, count_row):
if (i+1) % len(sizes) == 0:
l = df.iloc[pre: i+1]
pre = i+1
all.append(l)
# extract each size results
all_data = []
for i in range(iterations):
l = list(all[i][1])
all_data.append(list(l))
print("Extracted the results for each size under analysis")
#computes the median and mean of iterations
means = {}
medians = {}
grouped = df.groupby(df[0])
for cache_size in sizes:
times = grouped.get_group(cache_size)
means[cache_size] = mean(times[1])
medians[cache_size] = median(times[1])
draw_single_run_plot(medians, "medians.pdf")
draw_single_run_plot(means, "means.pdf")
draw_boxplot(all_data, sizes, "boxplot.pdf")
draw_all_datapoints(all_data, sizes, "all_runs.pdf")
if __name__ == '__main__':
main()