from k1lib.imports import *
import requests
Tests how much load can a sql server (specifically, mysql) handle under different conditions. A bit like 9-request-throughput, but on sql servers.
Write perf seems to be around 1k requests/s, while lookup and find values in range of indexed fields hovers at around 2.5k requests/s. For fields without an index, it's gets progressively slower as you put more entries into the database. Surprisingly, finding 1% of values in range vs 10% of values have roughly the same performance.
Again, exact configurations are at https://github.com/157239n/lbbench, under the mysql folder.
ip = "172.28.0.2"
url = f"http://{ip}:8000"
def test(f):
ts = []
for i in range(10):
with k1.timer() as t: f()
ts.append(t())
return ts | aS(k1.UValue.fromSeries)
#notest
with k1.timer() as t:
requests.get(f"{url}/drop")
insertSync = test(lambda: requests.get(f"{url}/insert/sync/1000"))
insertAsync = test(lambda: requests.get(f"{url}/insert/async/1000"))
insertSyncMp = test(lambda: range(10) | applyMp(lambda x: requests.get(f"{url}/insert/sync/100")) | ignore())
insertBulk = test(lambda: requests.get(f"{url}/insert/bulk/100000"))
[insertSync, insertAsync, insertSyncMp, insertBulk] | toList().all() | deref() | aS(json.dumps) | wrapList() | file("inserts.json")
t()
46.426697969436646
[insertSync, insertAsync, insertSyncMp, insertBulk] = cat("inserts.json") | join() | aS(json.loads) | apply(~aS(k1.UValue)) | deref()
summary = [
[insertSync, "insertSync", 1000],
[insertAsync, "insertAsync", 1000],
[insertSyncMp, "insertSyncMp", 1000],
[insertBulk, "insertBulk", 100000],
]
Duration:
summary | op()[0].sample().all() | deref() | aS(plt.boxplot, labels=summary | cut(1) | deref());
plt.ylabel("Seconds"); plt.ylim(bottom=0)
(0.0, 4.411142122745514)
Throughput:
summary | apply(~aS(lambda x, y, z: (z/x).sample())) | deref() | aS(plt.boxplot, labels=summary | cut(1) | deref());
plt.ylabel("Queries/s"); plt.yscale("log")
getThroughput = grep("Requests per second") | item() | op().split(":")[1].strip().split(" ")[0].ab_float()
getLatency = grep("99%") | tail(1) | item() | op().split("%")[1].strip().split(" ")[0].ab_int()
#notest
with k1.timer() as t, k1.captureStdout() as out:
data = []
for n in torch.loglinspace(1e3, 1e6, 10) | head(-1) | op().item().all() | apply(round):
requests.get(f"{url}/drop")
requests.get(f"{url}/insert/bulk/{n}")
lookup_index = None | cmd(f"ab -n 1000 -c 10 {url}/query/index") | (getThroughput & getLatency) | deref()
lookup_noindex = None | cmd(f"ab -n 1000 -c 10 {url}/query/noindex") | (getThroughput & getLatency) | deref()
inRange1_index = None | cmd(f"ab -n 50 -c 10 {url}/query/inrange/index/0/10") | (getThroughput & getLatency) | deref()
inRange1_noindex = None | cmd(f"ab -n 50 -c 10 {url}/query/inrange/noindex/0/10") | (getThroughput & getLatency) | deref()
inRange2_index = None | cmd(f"ab -n 50 -c 10 {url}/query/inrange/index/0/100") | (getThroughput & getLatency) | deref()
inRange2_noindex = None | cmd(f"ab -n 50 -c 10 {url}/query/inrange/noindex/0/100") | (getThroughput & getLatency) | deref()
data.append([n, lookup_index, lookup_noindex, inRange1_index, inRange1_noindex, inRange2_index, inRange2_noindex])
data | aS(json.dumps) | wrapList() | file("query.json")
out() | tail() | stdout(); t()
Completed 300 requests Completed 400 requests Completed 500 requests Completed 600 requests Completed 700 requests Completed 800 requests Completed 900 requests Completed 1000 requests Finished 1000 requests
93.96197557449341
data = cat("query.json") | join("") | aS(json.loads); data | head(2)
[[1000, [2415.55, 6], [2476.43, 5], [2359.71, 5], [2223.21, 5], [2503.38, 5], [2417.56, 6]], [2154, [2784.77, 5], [2512.71, 6], [2538.72, 5], [2295.05, 6], [2564.89, 6], [2298.43, 6]]]
Throughput:
# thumbnail
data | apply(~aS(lambda x, *y: y | cut(0) | apply(lambda z: [x, z]))) | transpose() | (transpose() | ~aS(plt.plot)).all() | ignore();
plt.xscale("log"); plt.yscale("log"); plt.xlabel("#Entries"); plt.ylabel("Requests/s"); plt.grid(True)
plt.legend(["lookup index", "lookup noindex", "inrange 1% index", "inrange 1% noindex", "inrange 10% index", "inrange 10% noindex"]);
Latency:
data | apply(~aS(lambda x, *y: y | cut(1) | apply(lambda z: [x, z]))) | transpose() | (transpose() | ~aS(plt.plot)).all() | ignore();
plt.xscale("log"); plt.yscale("log"); plt.xlabel("#Entries"); plt.ylabel("Latency (ms)"); plt.grid(True)
plt.legend(["lookup index", "lookup noindex", "inrange 1% index", "inrange 1% noindex", "inrange 10% index", "inrange 10% noindex"]);
#notest
with k1.timer() as t, k1.captureStdout() as out:
data = []; beginTime = time.time()
for n in torch.loglinspace(1e3, 1e6, 10) | head(-1) | op().item().all() | apply(round):
out.print(f"\r{n}, time elapsed: {int(time.time() - beginTime)}s ", end="")
requests.get(f"{url}/drop")
requests.get(f"{url}/insert/bulk/{n}")
headstring_index = None | cmd(f"ab -n 1000 -c 10 {url}/query/headstring/index/Adri") | (getThroughput & getLatency) | deref()
headstring_noindex = None | cmd(f"ab -n 1000 -c 10 {url}/query/headstring/noindex/Adri") | (getThroughput & getLatency) | deref()
tailstring_index = None | cmd(f"ab -n 1000 -c 10 {url}/query/headstring/index/Adri") | (getThroughput & getLatency) | deref()
tailstring_noindex = None | cmd(f"ab -n 1000 -c 10 {url}/query/headstring/noindex/Adri") | (getThroughput & getLatency) | deref()
midstring_index = None | cmd(f"ab -n 1000 -c 10 {url}/query/midstring/index/Adri") | (getThroughput & getLatency) | deref()
midstring_noindex = None | cmd(f"ab -n 1000 -c 10 {url}/query/midstring/noindex/Adri") | (getThroughput & getLatency) | deref()
data.append([n, headstring_index, headstring_noindex, tailstring_index, tailstring_noindex, midstring_index, midstring_noindex])
data | aS(json.dumps) | wrapList() | file("query_string.json")
out() | tail() | stdout(); t()
464159, time elapsed: 163s Completed 300 requests Completed 400 requests Completed 500 requests Completed 600 requests Completed 700 requests Completed 800 requests Completed 900 requests Completed 1000 requests Finished 1000 requests
329.80144715309143
data = cat("query_string.json") | join("") | aS(json.loads); data | head(2)
[[1000, [2541.61, 6], [2586.4, 5], [2597.47, 6], [2585.39, 6], [2454.05, 6], [2697.69, 5]], [2154, [2686.97, 5], [2490.24, 6], [2735.31, 5], [2455.18, 6], [2413.02, 6], [2495.66, 7]]]
data | apply(~aS(lambda x, *y: y | cut(0) | apply(lambda z: [x, z]))) | transpose() | (transpose() | ~aS(plt.plot)).all() | ignore();
plt.xscale("log"); plt.yscale("log"); plt.xlabel("#Entries"); plt.ylabel("Requests/s"); plt.grid(True)
plt.legend(["headstring index", "headstring noindex", "tailstring index", "tailstring noindex", "midstring index", "midstring noindex"]);
data | apply(~aS(lambda x, *y: y | cut(1) | apply(lambda z: [x, z]))) | transpose() | (transpose() | ~aS(plt.plot)).all() | ignore();
plt.xscale("log"); plt.yscale("log"); plt.xlabel("#Entries"); plt.ylabel("Latency (ms)"); plt.grid(True)
plt.legend(["headstring index", "headstring noindex", "tailstring index", "tailstring noindex", "midstring index", "midstring noindex"]);
Quite interesting. If you're searching for the beginning (matches Adri%) or ending (%Adri) of the field, then it's very fast, but if you were to search for the middle (matches %Adri%), then it's super slow, and indexing wouldn't help.
Just as a quick reminder, here's the table setup and its indexes
requests.get(f"{url}/describe").content | aS(json.loads)
[[{'Field': 'id',
'Type': 'int',
'Null': 'NO',
'Key': 'PRI',
'Default': None,
'Extra': 'auto_increment'},
{'Field': 'name',
'Type': 'varchar(255)',
'Null': 'YES',
'Key': 'MUL',
'Default': None,
'Extra': ''},
{'Field': 'nameNoIndex',
'Type': 'varchar(255)',
'Null': 'YES',
'Key': '',
'Default': None,
'Extra': ''},
{'Field': 'phoneNumber',
'Type': 'varchar(255)',
'Null': 'YES',
'Key': 'MUL',
'Default': None,
'Extra': ''},
{'Field': 'passwordHash',
'Type': 'varchar(255)',
'Null': 'YES',
'Key': '',
'Default': None,
'Extra': ''},
{'Field': 'role',
'Type': 'int',
'Null': 'YES',
'Key': '',
'Default': None,
'Extra': ''},
{'Field': 'tier',
'Type': 'int',
'Null': 'YES',
'Key': 'MUL',
'Default': None,
'Extra': ''},
{'Field': 'tierExpirationTime',
'Type': 'bigint',
'Null': 'YES',
'Key': '',
'Default': None,
'Extra': ''},
{'Field': 'propertyShareFields',
'Type': 'varchar(255)',
'Null': 'YES',
'Key': '',
'Default': None,
'Extra': ''},
{'Field': 'PGShareFields',
'Type': 'varchar(255)',
'Null': 'YES',
'Key': '',
'Default': None,
'Extra': ''},
{'Field': 'lastUpdated',
'Type': 'bigint',
'Null': 'YES',
'Key': 'MUL',
'Default': None,
'Extra': ''}],
[{'Table': 'users',
'Non_unique': 0,
'Key_name': 'PRIMARY',
'Seq_in_index': 1,
'Column_name': 'id',
'Collation': 'A',
'Cardinality': 0,
'Sub_part': None,
'Packed': None,
'Null': '',
'Index_type': 'BTREE',
'Comment': '',
'Index_comment': '',
'Visible': 'YES',
'Expression': None},
{'Table': 'users',
'Non_unique': 1,
'Key_name': 'users_last_updated',
'Seq_in_index': 1,
'Column_name': 'lastUpdated',
'Collation': 'A',
'Cardinality': 1,
'Sub_part': None,
'Packed': None,
'Null': 'YES',
'Index_type': 'BTREE',
'Comment': '',
'Index_comment': '',
'Visible': 'YES',
'Expression': None},
{'Table': 'users',
'Non_unique': 1,
'Key_name': 'users_tier',
'Seq_in_index': 1,
'Column_name': 'tier',
'Collation': 'A',
'Cardinality': 1,
'Sub_part': None,
'Packed': None,
'Null': 'YES',
'Index_type': 'BTREE',
'Comment': '',
'Index_comment': '',
'Visible': 'YES',
'Expression': None},
{'Table': 'users',
'Non_unique': 1,
'Key_name': 'users_phone_number',
'Seq_in_index': 1,
'Column_name': 'phoneNumber',
'Collation': 'A',
'Cardinality': 1,
'Sub_part': None,
'Packed': None,
'Null': 'YES',
'Index_type': 'BTREE',
'Comment': '',
'Index_comment': '',
'Visible': 'YES',
'Expression': None},
{'Table': 'users',
'Non_unique': 1,
'Key_name': 'users_name',
'Seq_in_index': 1,
'Column_name': 'name',
'Collation': 'A',
'Cardinality': 1,
'Sub_part': None,
'Packed': None,
'Null': 'YES',
'Index_type': 'BTREE',
'Comment': '',
'Index_comment': '',
'Visible': 'YES',
'Expression': None}]]