sisap25-deglib/plot.py at main · Visual-Computing/sisap25-deglib · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# This is based on https://github.com/matsui528/annbench/blob/main/plot.py
import argparse
import csv
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import sys
from itertools import cycle

from datasets import get_query_count


marker = cycle(('p', '^', 'h', 'x', 'o', 's', '*', '+', 'D', '1', 'X'))
linestyle = cycle((':', '-', '--'))

def draw(lines, xlabel, ylabel, title, filename, with_ctrl, width, height):
    """
    Visualize search results and save them as an image
    Args:
        lines (list): search results. list of dict.
        xlabel (str): label of x-axis, usually "recall"
        ylabel (str): label of y-axis, usually "query per sec"
        title (str): title of the result_img
        filename (str): output file name of image
        with_ctrl (bool): show control parameters or not
        width (int): width of the figure
        height (int): height of the figure
    """
    plt.figure(figsize=(width, height))

    for line in lines:
        for key in ["xs", "ys", "label", "ctrls"]:
            assert key in line

    for line in lines:
        plt.plot(line["xs"], line["ys"], label=line["label"], marker=next(marker), linestyle=next(linestyle))
        if with_ctrl:
            for x, y, ctrl in zip(line["xs"], line["ys"], line["ctrls"]):
                plt.annotate(text=str(ctrl), xy=(x, y),
                             xytext=(x, y+50))

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid(which="both")
    plt.yscale("log")
    plt.legend(bbox_to_anchor=(1.05, 1.0), loc="upper left")
    plt.title(title)
    plt.savefig(filename, bbox_inches='tight')
    plt.cla()

def get_pareto_frontier(line):
    data = sorted(zip(line["ys"], line["xs"], line["ctrls"]),reverse=True)
    line["xs"] = []
    line["ys"] = []
    line["ctrls"] = []

    cur = 0
    for y, x, label in data:
        if x > cur:
            cur = x
            line["xs"].append(x)
            line["ys"].append(y)
            line["ctrls"].append(label)

    return line

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--task",
        choices=['task1', 'task2'],
        default='task2'
    )
    parser.add_argument("csvfile")
    args = parser.parse_args()

    with open(args.csvfile, newline="") as csvfile:
        reader = csv.DictReader(csvfile)
        data = list(reader)

    lines = {}
    for res in data:
        if res["task"] != args.task:
            continue
        dataset = res["dataset"]
        algo = res["algo"]
        label = dataset + algo
        if label not in lines:
            lines[label] =  {
                "xs": [],
                "ys": [],
                "ctrls": [],
                "label": label,
            }
        lines[label]["xs"].append(float(res["recall"]))
        lines[label]["ys"].append(get_query_count(dataset, args.task)/float(res["querytime"])) # FIX query size hardcoded
        try:
            run_identifier = res["params"].split("query=")[1]
        except:
            run_identifier = res["params"]
        lines[label]["ctrls"].append(run_identifier)

    draw([get_pareto_frontier(line) for line in lines.values()],
            "Recall", "QPS (1/s)", "Result", f"result_{args.task}.png", True, 10, 8)