Files
supercomputers/benchmark.py

116 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Запускает make run <number_of_runs> раз и считает статистику по времени выполнения.
Тупо парсит out.txt и берём значение из строки "Total execution time: <time> sec".
python benchmark.py <number_of_runs>
"""
import os
import re
import sys
import time
import subprocess
import statistics
N = int(sys.argv[1]) if len(sys.argv) > 1 else 10
OUT = "out.txt"
TIME_RE = re.compile(r"Total execution time:\s*([0-9]*\.?[0-9]+)\s*sec")
JOB_RE = re.compile(r"Submitted batch job\s+(\d+)")
APPEAR_TIMEOUT = 300.0 # ждать появления out.txt
FINISH_TIMEOUT = 3600.0 # ждать появления Total execution time (сек)
POLL = 0.2 # частота проверки файла
def wait_for_exists(path: str, timeout: float):
t0 = time.time()
while not os.path.exists(path):
if time.time() - t0 > timeout:
raise TimeoutError(f"{path} did not appear within {timeout} seconds")
time.sleep(POLL)
def try_read(path: str) -> str:
try:
with open(path, "r", encoding="utf-8", errors="replace") as f:
return f.read()
except FileNotFoundError:
return ""
except OSError:
# бывает, что файл на NFS в момент записи недоступен на чтение
return ""
def wait_for_time_line(path: str, timeout: float) -> float:
t0 = time.time()
last_report = 0.0
while True:
txt = try_read(path)
matches = TIME_RE.findall(txt)
if matches:
return float(matches[-1]) # последняя встреченная строка
now = time.time()
if now - t0 > timeout:
tail = txt[-800:] if txt else "<empty>"
raise TimeoutError("Timed out waiting for 'Total execution time' line.\n"
f"Last 800 chars of out.txt:\n{tail}")
# иногда полезно печатать прогресс раз в ~5 сек
if now - last_report > 5.0:
last_report = now
if txt:
# показать последнюю непустую строку
lines = [l for l in txt.splitlines() if l.strip()]
if lines:
print(f" waiting... last line: {lines[-1][:120]}", flush=True)
else:
print(" waiting... (out.txt empty)", flush=True)
else:
print(" waiting... (out.txt not readable yet)", flush=True)
time.sleep(POLL)
times = []
for i in range(N):
print(f"Run {i+1}/{N} ...", flush=True)
# удаляем out.txt перед запуском
try:
os.remove(OUT)
except FileNotFoundError:
pass
# запускаем make run и забираем stdout (там будет Submitted batch job XXX)
res = subprocess.run(["make", "run"], capture_output=True, text=True)
out = (res.stdout or "") + "\n" + (res.stderr or "")
job_id = None
m = JOB_RE.search(out)
if m:
job_id = m.group(1)
print(f" submitted job {job_id}", flush=True)
else:
print(" (job id not detected; will only watch out.txt)", flush=True)
# ждём появления out.txt и появления строки с Total execution time
wait_for_exists(OUT, APPEAR_TIMEOUT)
t = wait_for_time_line(OUT, FINISH_TIMEOUT)
times.append(t)
print(f" time = {t:.3f} sec", flush=True)
# опционально удалить out.txt после парсинга
try:
os.remove(OUT)
except FileNotFoundError:
pass
print("\n=== RESULTS ===")
print(f"Runs: {len(times)}")
print(f"Mean: {statistics.mean(times):.3f} sec")
print(f"Median: {statistics.median(times):.3f} sec")
print(f"Min: {min(times):.3f} sec")
print(f"Max: {max(times):.3f} sec")
if len(times) > 1:
print(f"Stddev: {statistics.stdev(times):.3f} sec")