Перешёл к произвольным периодам агрегации

upsampling данных
Неактуальная инфа
2025-12-16 13:53:36 +00:00 · 2025-12-16 13:25:38 +00:00 · 2025-12-16 12:45:11 +00:00 · 2025-12-15 14:46:27 +00:00 · 2025-12-15 13:04:26 +00:00 · 2025-12-15 12:57:56 +00:00
16 changed files with 796 additions and 308 deletions
--- a/README.md
+++ b/README.md
@@ -2,6 +2,16 @@

 [Kaggle Bitcoin Historical Data](https://www.kaggle.com/datasets/mczielinski/bitcoin-historical-data)

+Исходные данные хранят информацию по каждой минуте. Чтобы увеличить объём данных
+для более наглядной демонстрации эффективности параллельных вычислений
+и вычислений на GPU, с помощью линейной интерполяции данные были преобразованы
+из данных о каждой минуте в данные о каждых 10 секундах, то есть объём данных увеличился
+в 6 раз.
+
+```
+python3 upsample.py -i ./data/data.csv -o ./data/data_10s.csv -s 10
+```
+
 ## Задание

 Группируем данные по дням (Timestamp), за каждый день вычисляем среднюю цену 
@@ -10,26 +20,6 @@
 не менее чем на 10% от даты начала интервала, вместе с минимальными и максимальными 
 значениями Open и Close за все дни внутри интервала.

-## Параллельное чтение данных
-
-Нет смысла параллельно читать данные из NFS, так как в реальности файлы с данными
-будут лежать только на NFS сервере. То есть другие узлы лишь отправляют сетевые запросы
-на NFS сервер, который уже читает реальные данные с диска и лишь затем отправляет
-их другим узлам.
-
-Чтобы этого избежать, нужно на всех машинах скопировать файлы с данными в их реальные
-файловые системы. Например в папку `/data`.
-
-```sh
-# На каждом узле создаем директорию /data
-sudo mkdir /data
-sudo chown $USER /data
-
-# Копируем данные
-cd /mnt/shared/supercomputers/data
-cp data.csv /data/
-```
-
 ## Сборка

 Проект обязательно должен быть расположен в общей директории для всех узлов,
--- a/benchmark.py
+++ b/benchmark.py
@@ -0,0 +1,115 @@
+"""
+Запускает make run <number_of_runs> раз и считает статистику по времени выполнения.
+Тупо парсит out.txt и берём значение из строки "Total execution time: <time> sec".
+
+python benchmark.py <number_of_runs>
+"""
+
+import os
+import re
+import sys
+import time
+import subprocess
+import statistics
+
+N = int(sys.argv[1]) if len(sys.argv) > 1 else 10
+OUT = "out.txt"
+
+TIME_RE = re.compile(r"Total execution time:\s*([0-9]*\.?[0-9]+)\s*sec")
+JOB_RE  = re.compile(r"Submitted batch job\s+(\d+)")
+
+APPEAR_TIMEOUT = 300.0     # ждать появления out.txt
+FINISH_TIMEOUT = 3600.0    # ждать появления Total execution time (сек)
+POLL = 0.2                 # частота проверки файла
+
+def wait_for_exists(path: str, timeout: float):
+    t0 = time.time()
+    while not os.path.exists(path):
+        if time.time() - t0 > timeout:
+            raise TimeoutError(f"{path} did not appear within {timeout} seconds")
+        time.sleep(POLL)
+
+def try_read(path: str) -> str:
+    try:
+        with open(path, "r", encoding="utf-8", errors="replace") as f:
+            return f.read()
+    except FileNotFoundError:
+        return ""
+    except OSError:
+        # бывает, что файл на NFS в момент записи недоступен на чтение
+        return ""
+
+def wait_for_time_line(path: str, timeout: float) -> float:
+    t0 = time.time()
+    last_report = 0.0
+    while True:
+        txt = try_read(path)
+        matches = TIME_RE.findall(txt)
+        if matches:
+            return float(matches[-1])  # последняя встреченная строка
+
+        now = time.time()
+        if now - t0 > timeout:
+            tail = txt[-800:] if txt else "<empty>"
+            raise TimeoutError("Timed out waiting for 'Total execution time' line.\n"
+                               f"Last 800 chars of out.txt:\n{tail}")
+
+        # иногда полезно печатать прогресс раз в ~5 сек
+        if now - last_report > 5.0:
+            last_report = now
+            if txt:
+                # показать последнюю непустую строку
+                lines = [l for l in txt.splitlines() if l.strip()]
+                if lines:
+                    print(f"  waiting... last line: {lines[-1][:120]}", flush=True)
+                else:
+                    print("  waiting... (out.txt empty)", flush=True)
+            else:
+                print("  waiting... (out.txt not readable yet)", flush=True)
+
+        time.sleep(POLL)
+
+times = []
+
+for i in range(N):
+    print(f"Run {i+1}/{N} ...", flush=True)
+
+    # удаляем out.txt перед запуском
+    try:
+        os.remove(OUT)
+    except FileNotFoundError:
+        pass
+
+    # запускаем make run и забираем stdout (там будет Submitted batch job XXX)
+    res = subprocess.run(["make", "run"], capture_output=True, text=True)
+    out = (res.stdout or "") + "\n" + (res.stderr or "")
+
+    job_id = None
+    m = JOB_RE.search(out)
+    if m:
+        job_id = m.group(1)
+        print(f"  submitted job {job_id}", flush=True)
+    else:
+        print("  (job id not detected; will only watch out.txt)", flush=True)
+
+    # ждём появления out.txt и появления строки с Total execution time
+    wait_for_exists(OUT, APPEAR_TIMEOUT)
+    t = wait_for_time_line(OUT, FINISH_TIMEOUT)
+
+    times.append(t)
+    print(f"  time = {t:.3f} sec", flush=True)
+
+    # опционально удалить out.txt после парсинга
+    try:
+        os.remove(OUT)
+    except FileNotFoundError:
+        pass
+
+print("\n=== RESULTS ===")
+print(f"Runs:   {len(times)}")
+print(f"Mean:   {statistics.mean(times):.3f} sec")
+print(f"Median: {statistics.median(times):.3f} sec")
+print(f"Min:    {min(times):.3f} sec")
+print(f"Max:    {max(times):.3f} sec")
+if len(times) > 1:
+    print(f"Stddev: {statistics.stdev(times):.3f} sec")
--- a/run.slurm
+++ b/run.slurm
@@ -6,13 +6,16 @@
 #SBATCH --output=out.txt

 # Путь к файлу данных (должен существовать на всех узлах)
-export DATA_PATH="/mnt/shared/supercomputers/data/data.csv"
+export DATA_PATH="/mnt/shared/supercomputers/data/data_10s.csv"

 # Доли данных для каждого ранка (сумма определяет пропорции)
-export DATA_READ_SHARES="10,12,13,13"
+export DATA_READ_SHARES="10,11,13,14"

 # Размер перекрытия в байтах для обработки границ строк
 export READ_OVERLAP_BYTES=131072

+# Интервал агрегации в секундах (60 = минуты, 600 = 10 минут, 86400 = дни)
+export AGGREGATION_INTERVAL=60
+
 cd /mnt/shared/supercomputers/build
 mpirun -np $SLURM_NTASKS ./bitcoin_app
--- a/src/aggregation.cpp
+++ b/src/aggregation.cpp
@@ -1,87 +1,50 @@
 #include "aggregation.hpp"
+#include "utils.hpp"
+#include <map>
 #include <algorithm>
 #include <limits>
-#include <cmath>

-std::vector<DayStats> aggregate_days(const std::vector<Record>& records) {
-    // Группируем записи по дням
-    std::map<DayIndex, std::vector<const Record*>> day_records;
+std::vector<PeriodStats> aggregate_periods(const std::vector<Record>& records) {
+    int64_t interval = get_aggregation_interval();
+    
+    struct PeriodAccumulator {
+        double avg_sum = 0.0;
+        double open_min = std::numeric_limits<double>::max();
+        double open_max = std::numeric_limits<double>::lowest();
+        double close_min = std::numeric_limits<double>::max();
+        double close_max = std::numeric_limits<double>::lowest();
+        int64_t count = 0;
+    };
+    
+    std::map<PeriodIndex, PeriodAccumulator> periods;
    
    for (const auto& r : records) {
-        DayIndex day = static_cast<DayIndex>(r.timestamp) / 86400;
-        day_records[day].push_back(&r);
+        PeriodIndex period = static_cast<PeriodIndex>(r.timestamp) / interval;
+        auto& acc = periods[period];
+        
+        double avg = (r.low + r.high) / 2.0;
+        acc.avg_sum += avg;
+        acc.open_min = std::min(acc.open_min, r.open);
+        acc.open_max = std::max(acc.open_max, r.open);
+        acc.close_min = std::min(acc.close_min, r.close);
+        acc.close_max = std::max(acc.close_max, r.close);
+        acc.count++;
    }
    
-    std::vector<DayStats> result;
-    result.reserve(day_records.size());
+    std::vector<PeriodStats> result;
+    result.reserve(periods.size());
    
-    for (auto& [day, recs] : day_records) {
-        // Сортируем по timestamp для определения first/last
-        std::sort(recs.begin(), recs.end(), 
-            [](const Record* a, const Record* b) {
-                return a->timestamp < b->timestamp;
-            });
-        
-        DayStats stats;
-        stats.day = day;
-        stats.low = std::numeric_limits<double>::max();
-        stats.high = std::numeric_limits<double>::lowest();
-        stats.open = recs.front()->open;
-        stats.close = recs.back()->close;
-        stats.first_ts = recs.front()->timestamp;
-        stats.last_ts = recs.back()->timestamp;
-        
-        for (const auto* r : recs) {
-            stats.low = std::min(stats.low, r->low);
-            stats.high = std::max(stats.high, r->high);
-        }
-        
-        stats.avg = (stats.low + stats.high) / 2.0;
-        
+    for (const auto& [period, acc] : periods) {
+        PeriodStats stats;
+        stats.period = period;
+        stats.avg = acc.avg_sum / static_cast<double>(acc.count);
+        stats.open_min = acc.open_min;
+        stats.open_max = acc.open_max;
+        stats.close_min = acc.close_min;
+        stats.close_max = acc.close_max;
+        stats.count = acc.count;
        result.push_back(stats);
    }
    
    return result;
 }
-
-std::vector<DayStats> merge_day_stats(const std::vector<DayStats>& all_stats) {
-    // Объединяем статистику по одинаковым дням (если такие есть)
-    std::map<DayIndex, DayStats> merged;
-    
-    for (const auto& s : all_stats) {
-        auto it = merged.find(s.day);
-        if (it == merged.end()) {
-            merged[s.day] = s;
-        } else {
-            // Объединяем данные за один день
-            auto& m = it->second;
-            m.low = std::min(m.low, s.low);
-            m.high = std::max(m.high, s.high);
-            
-            // open берём от записи с меньшим timestamp
-            if (s.first_ts < m.first_ts) {
-                m.open = s.open;
-                m.first_ts = s.first_ts;
-            }
-            
-            // close берём от записи с большим timestamp
-            if (s.last_ts > m.last_ts) {
-                m.close = s.close;
-                m.last_ts = s.last_ts;
-            }
-            
-            m.avg = (m.low + m.high) / 2.0;
-        }
-    }
-    
-    // Преобразуем в отсортированный вектор
-    std::vector<DayStats> result;
-    result.reserve(merged.size());
-    
-    for (auto& [day, stats] : merged) {
-        result.push_back(stats);
-    }
-    
-    return result;
-}
-
--- a/src/aggregation.hpp
+++ b/src/aggregation.hpp
@@ -1,14 +1,8 @@
 #pragma once

 #include "record.hpp"
-#include "day_stats.hpp"
+#include "period_stats.hpp"
 #include <vector>
-#include <map>
-
-// Агрегация записей по дням на одном узле
-std::vector<DayStats> aggregate_days(const std::vector<Record>& records);
-
-// Объединение агрегированных данных с разных узлов
-// (на случай если один день попал на разные узлы - но в нашей схеме это не должно случиться)
-std::vector<DayStats> merge_day_stats(const std::vector<DayStats>& all_stats);

+// Агрегация записей по периодам на одном узле
+std::vector<PeriodStats> aggregate_periods(const std::vector<Record>& records);
--- a/src/day_stats.hpp
+++ b/src/day_stats.hpp
@@ -1,28 +0,0 @@
-#pragma once
-#include <cstdint>
-
-using DayIndex = long long;
-
-// Агрегированные данные за один день
-struct DayStats {
-    DayIndex day;       // индекс дня (timestamp / 86400)
-    double low;         // минимальный Low за день
-    double high;        // максимальный High за день
-    double open;        // первый Open за день
-    double close;       // последний Close за день
-    double avg;         // среднее = (low + high) / 2
-    double first_ts;    // timestamp первой записи (для определения порядка open)
-    double last_ts;     // timestamp последней записи (для определения close)
-};
-
-// Интервал с изменением >= 10%
-struct Interval {
-    DayIndex start_day;
-    DayIndex end_day;
-    double min_open;
-    double max_close;
-    double start_avg;
-    double end_avg;
-    double change;
-};
-
--- a/src/gpu_loader.cpp
+++ b/src/gpu_loader.cpp
@@ -1,4 +1,5 @@
 #include "gpu_loader.hpp"
+#include "utils.hpp"
 #include <dlfcn.h>
 #include <map>
 #include <algorithm>
@@ -29,58 +30,54 @@ bool gpu_is_available() {
    return false;
 }

-gpu_aggregate_days_fn load_gpu_aggregate_days() {
+gpu_aggregate_periods_fn load_gpu_aggregate_periods() {
    void* h = get_gpu_lib_handle();
    if (!h) return nullptr;

-    auto fn = (gpu_aggregate_days_fn)dlsym(h, "gpu_aggregate_days");
+    auto fn = (gpu_aggregate_periods_fn)dlsym(h, "gpu_aggregate_periods");
    return fn;
 }

-bool aggregate_days_gpu(
+bool aggregate_periods_gpu(
    const std::vector<Record>& records,
-    std::vector<DayStats>& out_stats,
-    gpu_aggregate_days_fn gpu_fn)
+    std::vector<PeriodStats>& out_stats,
+    gpu_aggregate_periods_fn gpu_fn)
 {
    if (!gpu_fn || records.empty()) {
        return false;
    }

-    // Общий таймер всей функции
+    int64_t interval = get_aggregation_interval();
+
    double t_total_start = omp_get_wtime();
-    
-    // Таймер CPU preprocessing
    double t_preprocess_start = omp_get_wtime();

-    // Группируем записи по дням и подготавливаем данные для GPU
-    std::map<DayIndex, std::vector<size_t>> day_record_indices;
+    std::map<PeriodIndex, std::vector<size_t>> period_record_indices;
    
    for (size_t i = 0; i < records.size(); i++) {
-        DayIndex day = static_cast<DayIndex>(records[i].timestamp) / 86400;
-        day_record_indices[day].push_back(i);
+        PeriodIndex period = static_cast<PeriodIndex>(records[i].timestamp) / interval;
+        period_record_indices[period].push_back(i);
    }

-    int num_days = static_cast<int>(day_record_indices.size());
+    int num_periods = static_cast<int>(period_record_indices.size());
    
-    // Подготавливаем массивы для GPU
    std::vector<GpuRecord> gpu_records;
-    std::vector<int> day_offsets;
-    std::vector<int> day_counts;
-    std::vector<long long> day_indices;
+    std::vector<int> period_offsets;
+    std::vector<int> period_counts;
+    std::vector<long long> period_indices;
    
    gpu_records.reserve(records.size());
-    day_offsets.reserve(num_days);
-    day_counts.reserve(num_days);
-    day_indices.reserve(num_days);
+    period_offsets.reserve(num_periods);
+    period_counts.reserve(num_periods);
+    period_indices.reserve(num_periods);
    
    int current_offset = 0;
    
-    for (auto& [day, indices] : day_record_indices) {
-        day_indices.push_back(day);
-        day_offsets.push_back(current_offset);
-        day_counts.push_back(static_cast<int>(indices.size()));
+    for (auto& [period, indices] : period_record_indices) {
+        period_indices.push_back(period);
+        period_offsets.push_back(current_offset);
+        period_counts.push_back(static_cast<int>(indices.size()));
        
-        // Добавляем записи этого дня
        for (size_t idx : indices) {
            const auto& r = records[idx];
            GpuRecord gr;
@@ -96,22 +93,19 @@ bool aggregate_days_gpu(
        current_offset += static_cast<int>(indices.size());
    }
    
-    // Выделяем память для результата
-    std::vector<GpuDayStats> gpu_stats(num_days);
+    std::vector<GpuPeriodStats> gpu_stats(num_periods);
    
    double t_preprocess_ms = (omp_get_wtime() - t_preprocess_start) * 1000.0;
    std::cout << "  GPU CPU preprocessing:  " << std::fixed << std::setprecision(3) 
              << std::setw(7) << t_preprocess_ms << " ms" << std::endl << std::flush;
    
-    // Вызываем GPU функцию (включает: malloc, memcpy H->D, kernel, memcpy D->H, free)
-    // Детальные тайминги выводятся внутри GPU функции
    int result = gpu_fn(
        gpu_records.data(),
        static_cast<int>(gpu_records.size()),
-        day_offsets.data(),
-        day_counts.data(),
-        day_indices.data(),
-        num_days,
+        period_offsets.data(),
+        period_counts.data(),
+        period_indices.data(),
+        num_periods,
        gpu_stats.data()
    );
    
@@ -120,24 +114,21 @@ bool aggregate_days_gpu(
        return false;
    }
    
-    // Конвертируем результат в DayStats
    out_stats.clear();
-    out_stats.reserve(num_days);
+    out_stats.reserve(num_periods);
    
    for (const auto& gs : gpu_stats) {
-        DayStats ds;
-        ds.day = gs.day;
-        ds.low = gs.low;
-        ds.high = gs.high;
-        ds.open = gs.open;
-        ds.close = gs.close;
-        ds.avg = gs.avg;
-        ds.first_ts = gs.first_ts;
-        ds.last_ts = gs.last_ts;
-        out_stats.push_back(ds);
+        PeriodStats ps;
+        ps.period = gs.period;
+        ps.avg = gs.avg;
+        ps.open_min = gs.open_min;
+        ps.open_max = gs.open_max;
+        ps.close_min = gs.close_min;
+        ps.close_max = gs.close_max;
+        ps.count = gs.count;
+        out_stats.push_back(ps);
    }
    
-    // Общее время всей GPU функции (включая preprocessing)
    double t_total_ms = (omp_get_wtime() - t_total_start) * 1000.0;
    std::cout << "  GPU TOTAL (with prep):  " << std::fixed << std::setprecision(3) 
              << std::setw(7) << t_total_ms << " ms" << std::endl << std::flush;
--- a/src/gpu_loader.hpp
+++ b/src/gpu_loader.hpp
@@ -1,5 +1,5 @@
 #pragma once
-#include "day_stats.hpp"
+#include "period_stats.hpp"
 #include "record.hpp"
 #include <vector>

@@ -18,34 +18,33 @@ struct GpuRecord {
    double volume;
 };

-struct GpuDayStats {
-    long long day;
-    double low;
-    double high;
-    double open;
-    double close;
+struct GpuPeriodStats {
+    long long period;
    double avg;
-    double first_ts;
-    double last_ts;
+    double open_min;
+    double open_max;
+    double close_min;
+    double close_max;
+    long long count;
 };

-using gpu_aggregate_days_fn = int (*)(
+using gpu_aggregate_periods_fn = int (*)(
    const GpuRecord* h_records,
    int num_records,
-    const int* h_day_offsets,
-    const int* h_day_counts,
-    const long long* h_day_indices,
-    int num_days,
-    GpuDayStats* h_out_stats
+    const int* h_period_offsets,
+    const int* h_period_counts,
+    const long long* h_period_indices,
+    int num_periods,
+    GpuPeriodStats* h_out_stats
 );

 // Загрузка функций из плагина
 gpu_is_available_fn load_gpu_is_available();
-gpu_aggregate_days_fn load_gpu_aggregate_days();
+gpu_aggregate_periods_fn load_gpu_aggregate_periods();

 // Обёртка для агрегации на GPU (возвращает true если успешно)
-bool aggregate_days_gpu(
+bool aggregate_periods_gpu(
    const std::vector<Record>& records,
-    std::vector<DayStats>& out_stats,
-    gpu_aggregate_days_fn gpu_fn
+    std::vector<PeriodStats>& out_stats,
+    gpu_aggregate_periods_fn gpu_fn
 );
--- a/src/gpu_plugin.cu
+++ b/src/gpu_plugin.cu
@@ -23,13 +23,12 @@ struct GpuRecord {

 struct GpuDayStats {
    long long day;
-    double low;
-    double high;
-    double open;
-    double close;
    double avg;
-    double first_ts;
-    double last_ts;
+    double open_min;
+    double open_max;
+    double close_min;
+    double close_max;
+    long long count;
 };

 extern "C" int gpu_is_available() {
@@ -63,32 +62,30 @@ __global__ void aggregate_kernel(
    
    GpuDayStats stats;
    stats.day = day_indices[d];
-    stats.low = DBL_MAX;
-    stats.high = -DBL_MAX;
-    stats.first_ts = DBL_MAX;
-    stats.last_ts = -DBL_MAX;
-    stats.open = 0;
-    stats.close = 0;
+    stats.open_min = DBL_MAX;
+    stats.open_max = -DBL_MAX;
+    stats.close_min = DBL_MAX;
+    stats.close_max = -DBL_MAX;
+    stats.count = count;
+    
+    double avg_sum = 0.0;
    
    for (int i = 0; i < count; i++) {
        const GpuRecord& r = records[offset + i];
        
-        // min/max
-        if (r.low < stats.low) stats.low = r.low;
-        if (r.high > stats.high) stats.high = r.high;
+        // Accumulate avg = (low + high) / 2
+        avg_sum += (r.low + r.high) / 2.0;
        
-        // first/last по timestamp
-        if (r.timestamp < stats.first_ts) {
-            stats.first_ts = r.timestamp;
-            stats.open = r.open;
-        }
-        if (r.timestamp > stats.last_ts) {
-            stats.last_ts = r.timestamp;
-            stats.close = r.close;
-        }
+        // min/max Open
+        if (r.open < stats.open_min) stats.open_min = r.open;
+        if (r.open > stats.open_max) stats.open_max = r.open;
+        
+        // min/max Close
+        if (r.close < stats.close_min) stats.close_min = r.close;
+        if (r.close > stats.close_max) stats.close_max = r.close;
    }
    
-    stats.avg = (stats.low + stats.high) / 2.0;
+    stats.avg = avg_sum / static_cast<double>(count);
    out_stats[d] = stats;
 }

--- a/src/intervals.cpp
+++ b/src/intervals.cpp
@@ -1,65 +1,301 @@
 #include "intervals.hpp"
+#include "utils.hpp"
+#include <mpi.h>
 #include <algorithm>
 #include <cmath>
 #include <fstream>
 #include <iomanip>
 #include <sstream>
 #include <ctime>
+#include <limits>

-std::vector<Interval> find_intervals(const std::vector<DayStats>& days, double threshold) {
-    if (days.empty()) {
-        return {};
+// Вспомогательная структура для накопления min/max в интервале
+struct IntervalAccumulator {
+    PeriodIndex start_period;
+    double start_avg;
+    double open_min;
+    double open_max;
+    double close_min;
+    double close_max;
+    
+    void init(const PeriodStats& p) {
+        start_period = p.period;
+        start_avg = p.avg;
+        open_min = p.open_min;
+        open_max = p.open_max;
+        close_min = p.close_min;
+        close_max = p.close_max;
    }
    
-    std::vector<Interval> intervals;
+    void update(const PeriodStats& p) {
+        open_min = std::min(open_min, p.open_min);
+        open_max = std::max(open_max, p.open_max);
+        close_min = std::min(close_min, p.close_min);
+        close_max = std::max(close_max, p.close_max);
+    }
    
+    Interval finalize(const PeriodStats& end_period, double change) const {
+        Interval iv;
+        iv.start_period = start_period;
+        iv.end_period = end_period.period;
+        iv.start_avg = start_avg;
+        iv.end_avg = end_period.avg;
+        iv.change = change;
+        iv.open_min = std::min(open_min, end_period.open_min);
+        iv.open_max = std::max(open_max, end_period.open_max);
+        iv.close_min = std::min(close_min, end_period.close_min);
+        iv.close_max = std::max(close_max, end_period.close_max);
+        return iv;
+    }
+};
+
+// Упакованная структура PeriodStats для MPI передачи (8 doubles)
+struct PackedPeriodStats {
+    double period;   // PeriodIndex as double
+    double avg;
+    double open_min;
+    double open_max;
+    double close_min;
+    double close_max;
+    double count;    // int64_t as double
+    double valid;    // флаг валидности (1.0 = valid, 0.0 = invalid)
+    
+    void pack(const PeriodStats& ps) {
+        period = static_cast<double>(ps.period);
+        avg = ps.avg;
+        open_min = ps.open_min;
+        open_max = ps.open_max;
+        close_min = ps.close_min;
+        close_max = ps.close_max;
+        count = static_cast<double>(ps.count);
+        valid = 1.0;
+    }
+    
+    PeriodStats unpack() const {
+        PeriodStats ps;
+        ps.period = static_cast<PeriodIndex>(period);
+        ps.avg = avg;
+        ps.open_min = open_min;
+        ps.open_max = open_max;
+        ps.close_min = close_min;
+        ps.close_max = close_max;
+        ps.count = static_cast<int64_t>(count);
+        return ps;
+    }
+    
+    bool is_valid() const { return valid > 0.5; }
+    void set_invalid() { valid = 0.0; }
+};
+
+IntervalResult find_intervals_parallel(
+    const std::vector<PeriodStats>& periods,
+    int rank, int size,
+    double threshold)
+{
+    IntervalResult result;
+    result.compute_time = 0.0;
+    result.wait_time = 0.0;
+    
+    if (periods.empty()) {
+        if (rank < size - 1) {
+            PackedPeriodStats invalid;
+            invalid.set_invalid();
+            MPI_Send(&invalid, 8, MPI_DOUBLE, rank + 1, 0, MPI_COMM_WORLD);
+        }
+        return result;
+    }
+    
+    double compute_start = MPI_Wtime();
+    
+    size_t process_until = (rank == size - 1) ? periods.size() : periods.size() - 1;
+    
+    IntervalAccumulator acc;
    size_t start_idx = 0;
-    double price_base = days[start_idx].avg;
+    bool have_pending_interval = false;
    
-    for (size_t i = 1; i < days.size(); i++) {
-        double price_now = days[i].avg;
-        double change = std::abs(price_now - price_base) / price_base;
+    if (rank > 0) {
+        double wait_start = MPI_Wtime();
        
-        if (change >= threshold) {
-            Interval interval;
-            interval.start_day = days[start_idx].day;
-            interval.end_day = days[i].day;
-            interval.start_avg = price_base;
-            interval.end_avg = price_now;
-            interval.change = change;
+        PackedPeriodStats received;
+        MPI_Recv(&received, 8, MPI_DOUBLE, rank - 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+        
+        result.wait_time = MPI_Wtime() - wait_start;
+        compute_start = MPI_Wtime();
+        
+        if (received.is_valid()) {
+            PeriodStats prev_period = received.unpack();
            
-            // Находим min(Open) и max(Close) в интервале
-            interval.min_open = days[start_idx].open;
-            interval.max_close = days[start_idx].close;
-            
-            for (size_t j = start_idx; j <= i; j++) {
-                interval.min_open = std::min(interval.min_open, days[j].open);
-                interval.max_close = std::max(interval.max_close, days[j].close);
+            for (start_idx = 0; start_idx < periods.size(); start_idx++) {
+                if (periods[start_idx].period > prev_period.period) {
+                    break;
+                }
            }
            
-            intervals.push_back(interval);
-            
-            // Начинаем новый интервал
-            start_idx = i + 1;
-            if (start_idx >= days.size()) {
-                break;
+            if (start_idx < process_until) {
+                acc.init(prev_period);
+                have_pending_interval = true;
+                
+                for (size_t i = start_idx; i < process_until; i++) {
+                    acc.update(periods[i]);
+                    
+                    double change = std::abs(periods[i].avg - acc.start_avg) / acc.start_avg;
+                    
+                    if (change >= threshold) {
+                        result.intervals.push_back(acc.finalize(periods[i], change));
+                        have_pending_interval = false;
+                        
+                        start_idx = i + 1;
+                        if (start_idx < process_until) {
+                            acc.init(periods[start_idx]);
+                            have_pending_interval = true;
+                        }
+                    }
+                }
            }
-            price_base = days[start_idx].avg;
+        } else {
+            if (process_until > 0) {
+                acc.init(periods[0]);
+                have_pending_interval = true;
+                start_idx = 0;
+            }
+        }
+    } else {
+        if (process_until > 0) {
+            acc.init(periods[0]);
+            have_pending_interval = true;
+            start_idx = 0;
        }
    }
    
-    return intervals;
+    if (rank == 0 && have_pending_interval) {
+        for (size_t i = 1; i < process_until; i++) {
+            acc.update(periods[i]);
+            
+            double change = std::abs(periods[i].avg - acc.start_avg) / acc.start_avg;
+            
+            if (change >= threshold) {
+                result.intervals.push_back(acc.finalize(periods[i], change));
+                have_pending_interval = false;
+                
+                start_idx = i + 1;
+                if (start_idx < process_until) {
+                    acc.init(periods[start_idx]);
+                    have_pending_interval = true;
+                }
+            }
+        }
+    }
+    
+    if (rank == size - 1 && have_pending_interval && !periods.empty()) {
+        const auto& last_period = periods.back();
+        double change = std::abs(last_period.avg - acc.start_avg) / acc.start_avg;
+        result.intervals.push_back(acc.finalize(last_period, change));
+    }
+    
+    result.compute_time = MPI_Wtime() - compute_start;
+    
+    if (rank < size - 1) {
+        PackedPeriodStats to_send;
+        
+        if (have_pending_interval) {
+            PeriodStats start_period;
+            start_period.period = acc.start_period;
+            start_period.avg = acc.start_avg;
+            start_period.open_min = acc.open_min;
+            start_period.open_max = acc.open_max;
+            start_period.close_min = acc.close_min;
+            start_period.close_max = acc.close_max;
+            start_period.count = 0;
+            to_send.pack(start_period);
+        } else if (periods.size() >= 2) {
+            to_send.pack(periods[periods.size() - 2]);
+        } else {
+            to_send.set_invalid();
+        }
+        
+        MPI_Send(&to_send, 8, MPI_DOUBLE, rank + 1, 0, MPI_COMM_WORLD);
+    }
+    
+    return result;
 }

-std::string day_index_to_date(DayIndex day) {
-    time_t ts = static_cast<time_t>(day) * 86400;
+double collect_intervals(
+    std::vector<Interval>& local_intervals,
+    int rank, int size)
+{
+    double wait_time = 0.0;
+    
+    if (rank == 0) {
+        for (int r = 1; r < size; r++) {
+            double wait_start = MPI_Wtime();
+            
+            int count;
+            MPI_Recv(&count, 1, MPI_INT, r, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            
+            if (count > 0) {
+                std::vector<double> buffer(count * 9);
+                MPI_Recv(buffer.data(), count * 9, MPI_DOUBLE, r, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                
+                for (int i = 0; i < count; i++) {
+                    Interval iv;
+                    iv.start_period = static_cast<PeriodIndex>(buffer[i * 9 + 0]);
+                    iv.end_period = static_cast<PeriodIndex>(buffer[i * 9 + 1]);
+                    iv.open_min = buffer[i * 9 + 2];
+                    iv.open_max = buffer[i * 9 + 3];
+                    iv.close_min = buffer[i * 9 + 4];
+                    iv.close_max = buffer[i * 9 + 5];
+                    iv.start_avg = buffer[i * 9 + 6];
+                    iv.end_avg = buffer[i * 9 + 7];
+                    iv.change = buffer[i * 9 + 8];
+                    local_intervals.push_back(iv);
+                }
+            }
+            
+            wait_time += MPI_Wtime() - wait_start;
+        }
+        
+        std::sort(local_intervals.begin(), local_intervals.end(),
+            [](const Interval& a, const Interval& b) {
+                return a.start_period < b.start_period;
+            });
+    } else {
+        int count = static_cast<int>(local_intervals.size());
+        MPI_Send(&count, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
+        
+        if (count > 0) {
+            std::vector<double> buffer(count * 9);
+            for (int i = 0; i < count; i++) {
+                const auto& iv = local_intervals[i];
+                buffer[i * 9 + 0] = static_cast<double>(iv.start_period);
+                buffer[i * 9 + 1] = static_cast<double>(iv.end_period);
+                buffer[i * 9 + 2] = iv.open_min;
+                buffer[i * 9 + 3] = iv.open_max;
+                buffer[i * 9 + 4] = iv.close_min;
+                buffer[i * 9 + 5] = iv.close_max;
+                buffer[i * 9 + 6] = iv.start_avg;
+                buffer[i * 9 + 7] = iv.end_avg;
+                buffer[i * 9 + 8] = iv.change;
+            }
+            MPI_Send(buffer.data(), count * 9, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
+        }
+    }
+    
+    return wait_time;
+}
+
+std::string period_index_to_datetime(PeriodIndex period) {
+    int64_t interval = get_aggregation_interval();
+    time_t ts = static_cast<time_t>(period) * interval;
    struct tm* tm_info = gmtime(&ts);
    
    std::ostringstream oss;
    oss << std::setfill('0') 
        << (tm_info->tm_year + 1900) << "-"
        << std::setw(2) << (tm_info->tm_mon + 1) << "-"
-        << std::setw(2) << tm_info->tm_mday;
+        << std::setw(2) << tm_info->tm_mday << " "
+        << std::setw(2) << tm_info->tm_hour << ":"
+        << std::setw(2) << tm_info->tm_min << ":"
+        << std::setw(2) << tm_info->tm_sec;
    
    return oss.str();
 }
@@ -68,16 +304,17 @@ void write_intervals(const std::string& filename, const std::vector<Interval>& i
    std::ofstream out(filename);
    
    out << std::fixed << std::setprecision(2);
-    out << "start_date,end_date,min_open,max_close,start_avg,end_avg,change\n";
+    out << "start_datetime,end_datetime,open_min,open_max,close_min,close_max,start_avg,end_avg,change\n";
    
    for (const auto& iv : intervals) {
-        out << day_index_to_date(iv.start_day) << ","
-            << day_index_to_date(iv.end_day) << ","
-            << iv.min_open << ","
-            << iv.max_close << ","
+        out << period_index_to_datetime(iv.start_period) << ","
+            << period_index_to_datetime(iv.end_period) << ","
+            << iv.open_min << ","
+            << iv.open_max << ","
+            << iv.close_min << ","
+            << iv.close_max << ","
            << iv.start_avg << ","
            << iv.end_avg << ","
            << std::setprecision(6) << iv.change << "\n";
    }
 }
-
--- a/src/intervals.hpp
+++ b/src/intervals.hpp
@@ -1,15 +1,44 @@
 #pragma once

-#include "day_stats.hpp"
+#include "period_stats.hpp"
 #include <vector>
 #include <string>

-// Вычисление интервалов с изменением >= threshold (по умолчанию 10%)
-std::vector<Interval> find_intervals(const std::vector<DayStats>& days, double threshold = 0.10);
+// Интервал с изменением >= threshold
+struct Interval {
+    PeriodIndex start_period;
+    PeriodIndex end_period;
+    double open_min;
+    double open_max;
+    double close_min;
+    double close_max;
+    double start_avg;
+    double end_avg;
+    double change;
+};
+
+// Результат параллельного построения интервалов
+struct IntervalResult {
+    std::vector<Interval> intervals;
+    double compute_time;  // время вычислений
+    double wait_time;     // время ожидания данных от предыдущего ранка
+};
+
+// Параллельное построение интервалов с использованием MPI
+IntervalResult find_intervals_parallel(
+    const std::vector<PeriodStats>& periods,
+    int rank, int size,
+    double threshold = 0.10
+);
+
+// Сбор интервалов со всех ранков на ранк 0
+double collect_intervals(
+    std::vector<Interval>& local_intervals,
+    int rank, int size
+);

 // Вывод интервалов в файл
 void write_intervals(const std::string& filename, const std::vector<Interval>& intervals);

-// Преобразование DayIndex в строку даты (YYYY-MM-DD)
-std::string day_index_to_date(DayIndex day);
-
+// Преобразование PeriodIndex в строку даты/времени
+std::string period_index_to_datetime(PeriodIndex period);
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -4,29 +4,89 @@
 #include <iomanip>

 #include "csv_loader.hpp"
-#include "utils.hpp"
 #include "record.hpp"
+#include "period_stats.hpp"
+#include "aggregation.hpp"
+#include "intervals.hpp"
+#include "utils.hpp"

 int main(int argc, char** argv) {
    MPI_Init(&argc, &argv);
+    double total_start = MPI_Wtime();

    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    // Параллельное чтение данных
-    double start_time = MPI_Wtime();
-    
+    double read_start = MPI_Wtime();
    std::vector<Record> records = load_csv_parallel(rank, size);
-    
-    double end_time = MPI_Wtime();
-    double read_time = end_time - start_time;
+    double read_time = MPI_Wtime() - read_start;

    std::cout << "Rank " << rank 
              << ": read " << records.size() << " records"
              << " in " << std::fixed << std::setprecision(3) << read_time << " sec"
              << std::endl;

+    // Агрегация по периодам
+    double agg_start = MPI_Wtime();
+    std::vector<PeriodStats> periods = aggregate_periods(records);
+    double agg_time = MPI_Wtime() - agg_start;
+
+    std::cout << "Rank " << rank 
+              << ": aggregated " << periods.size() << " periods"
+              << " [" << (periods.empty() ? 0 : periods.front().period) 
+              << ".." << (periods.empty() ? 0 : periods.back().period) << "]"
+              << " in " << std::fixed << std::setprecision(3) << agg_time << " sec"
+              << std::endl;
+
+    // Удаляем крайние периоды (могут быть неполными из-за параллельного чтения)
+    trim_edge_periods(periods, rank, size);
+
+    std::cout << "Rank " << rank 
+              << ": after trim " << periods.size() << " periods"
+              << " [" << (periods.empty() ? 0 : periods.front().period) 
+              << ".." << (periods.empty() ? 0 : periods.back().period) << "]"
+              << std::endl;
+
+    // Параллельное построение интервалов
+    IntervalResult iv_result = find_intervals_parallel(periods, rank, size);
+
+    std::cout << "Rank " << rank 
+              << ": found " << iv_result.intervals.size() << " intervals"
+              << ", compute " << std::fixed << std::setprecision(6) << iv_result.compute_time << " sec"
+              << ", wait " << iv_result.wait_time << " sec"
+              << std::endl;
+
+    // Сбор интервалов на ранке 0
+    double collect_wait = collect_intervals(iv_result.intervals, rank, size);
+
+    if (rank == 0) {
+        std::cout << "Rank 0: collected " << iv_result.intervals.size() << " total intervals"
+                  << ", wait " << std::fixed << std::setprecision(3) << collect_wait << " sec"
+                  << std::endl;
+    }
+
+    // Запись результатов в файл (только ранк 0)
+    if (rank == 0) {
+        double write_start = MPI_Wtime();
+        write_intervals("result.csv", iv_result.intervals);
+        double write_time = MPI_Wtime() - write_start;
+
+        std::cout << "Rank 0: wrote result.csv"
+                  << " in " << std::fixed << std::setprecision(3) << write_time << " sec"
+                  << std::endl;
+    }
+
+    // Вывод общего времени выполнения
+    MPI_Barrier(MPI_COMM_WORLD);
+    double total_time = MPI_Wtime() - total_start;
+    if (rank == 0) {
+        std::cout << "Total execution time: "
+                  << std::fixed << std::setprecision(3)
+                  << total_time << " sec" << std::endl;
+    }
+
    MPI_Finalize();
    return 0;
 }
--- a/src/period_stats.hpp
+++ b/src/period_stats.hpp
@@ -0,0 +1,15 @@
+#pragma once
+#include <cstdint>
+
+using PeriodIndex = int64_t;
+
+// Агрегированные данные за один период
+struct PeriodStats {
+    PeriodIndex period;   // индекс периода (timestamp / AGGREGATION_INTERVAL)
+    double avg;           // среднее значение (Low + High) / 2 по всем записям
+    double open_min;      // минимальный Open за период
+    double open_max;      // максимальный Open за период
+    double close_min;     // минимальный Close за период
+    double close_max;     // максимальный Close за период
+    int64_t count;        // количество записей, по которым агрегировали
+};
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -4,29 +4,6 @@
 #include <stdexcept>
 #include <numeric>

-std::map<DayIndex, std::vector<Record>> group_by_day(const std::vector<Record>& recs) {
-    std::map<DayIndex, std::vector<Record>> days;
-
-    for (const auto& r : recs) {
-        DayIndex day = static_cast<DayIndex>(r.timestamp) / 86400;
-        days[day].push_back(r);
-    }
-
-    return days;
-}
-
-std::vector<std::vector<DayIndex>> split_days(const std::map<DayIndex, std::vector<Record>>& days, int parts) {
-    std::vector<std::vector<DayIndex>> out(parts);
-
-    int i = 0;
-    for (auto& kv : days) {
-        out[i % parts].push_back(kv.first);
-        i++;
-    }
-
-    return out;
-}
-
 int get_num_cpu_threads() {
    const char* env_threads = std::getenv("NUM_CPU_THREADS");
    int num_cpu_threads = 1;
@@ -63,6 +40,10 @@ int64_t get_read_overlap_bytes() {
    return std::stoll(get_env("READ_OVERLAP_BYTES"));
 }

+int64_t get_aggregation_interval() {
+    return std::stoll(get_env("AGGREGATION_INTERVAL"));
+}
+
 int64_t get_file_size(const std::string& path) {
    std::ifstream file(path, std::ios::binary | std::ios::ate);
    if (!file.is_open()) {
@@ -73,7 +54,6 @@ int64_t get_file_size(const std::string& path) {

 ByteRange calculate_byte_range(int rank, int size, int64_t file_size,
                               const std::vector<int>& shares, int64_t overlap_bytes) {
-    // Если shares пустой или не соответствует size, используем равные доли
    std::vector<int> effective_shares;
    if (shares.size() == static_cast<size_t>(size)) {
        effective_shares = shares;
@@ -82,8 +62,6 @@ ByteRange calculate_byte_range(int rank, int size, int64_t file_size,
    }
    
    int total_shares = std::accumulate(effective_shares.begin(), effective_shares.end(), 0);
-    
-    // Вычисляем базовые границы для каждого ранка
    int64_t bytes_per_share = file_size / total_shares;
    
    int64_t base_start = 0;
@@ -93,22 +71,31 @@ ByteRange calculate_byte_range(int rank, int size, int64_t file_size,
    
    int64_t base_end = base_start + bytes_per_share * effective_shares[rank];
    
-    // Применяем overlap
    ByteRange range;
    
    if (rank == 0) {
-        // Первый ранк: начинаем с 0, добавляем overlap в конце
        range.start = 0;
        range.end = std::min(base_end + overlap_bytes, file_size);
    } else if (rank == size - 1) {
-        // Последний ранк: вычитаем overlap в начале, читаем до конца файла
        range.start = std::max(base_start - overlap_bytes, static_cast<int64_t>(0));
        range.end = file_size;
    } else {
-        // Промежуточные ранки: overlap с обеих сторон
        range.start = std::max(base_start - overlap_bytes, static_cast<int64_t>(0));
        range.end = std::min(base_end + overlap_bytes, file_size);
    }
    
    return range;
 }
+
+void trim_edge_periods(std::vector<PeriodStats>& periods, int rank, int size) {
+    if (periods.empty()) return;
+    
+    if (rank == 0) {
+        periods.pop_back();
+    } else if (rank == size - 1) {
+        periods.erase(periods.begin());
+    } else {
+        periods.pop_back();
+        periods.erase(periods.begin());
+    }
+}
--- a/src/utils.hpp
+++ b/src/utils.hpp
@@ -1,22 +1,19 @@
 #pragma once

 #include "record.hpp"
-#include "day_stats.hpp"
+#include "period_stats.hpp"
 #include <map>
 #include <vector>
 #include <string>
 #include <cstdlib>
 #include <cstdint>

-// Группировка записей по дням
-std::map<DayIndex, std::vector<Record>> group_by_day(const std::vector<Record>& recs);
-std::vector<std::vector<DayIndex>> split_days(const std::map<DayIndex, std::vector<Record>>& days, int parts);
-
 // Чтение переменных окружения
 int get_num_cpu_threads();
 std::string get_data_path();
 std::vector<int> get_data_read_shares();
 int64_t get_read_overlap_bytes();
+int64_t get_aggregation_interval();

 // Структура для хранения диапазона байт для чтения
 struct ByteRange {
@@ -30,3 +27,6 @@ ByteRange calculate_byte_range(int rank, int size, int64_t file_size,

 // Получение размера файла
 int64_t get_file_size(const std::string& path);
+
+// Удаляет крайние периоды, которые могут быть неполными из-за параллельного чтения
+void trim_edge_periods(std::vector<PeriodStats>& periods, int rank, int size);
--- a/upsample.py
+++ b/upsample.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+import argparse
+import sys
+import time
+
+
+def parse_row(line: str):
+    # Timestamp,Open,High,Low,Close,Volume
+    ts, o, h, l, c, v = line.split(',')
+    return int(float(ts)), float(o), float(h), float(l), float(c), float(v)
+
+
+def fmt_row(ts, o, h, l, c, v):
+    return f"{ts},{o:.2f},{h:.2f},{l:.2f},{c:.2f},{v:.8f}\n"
+
+
+def count_lines_fast(path: str) -> int:
+    with open(path, "rb") as f:
+        return sum(1 for _ in f) - 1  # минус header
+
+
+def main(inp, out, step, flush_every):
+    # считаем количество строк для прогресса
+    total_lines = count_lines_fast(inp)
+    print(f"Total input rows: {total_lines:,}", file=sys.stderr)
+
+    start_time = time.time()
+    processed = 0
+    last_report = start_time
+
+    with open(inp, "r", buffering=8 * 1024 * 1024) as fin, \
+         open(out, "w", buffering=8 * 1024 * 1024) as fout:
+
+        fin.readline()  # пропускаем header
+        fout.write("Timestamp,Open,High,Low,Close,Volume\n")
+
+        first = fin.readline()
+        if not first:
+            return
+
+        prev = parse_row(first.strip())
+
+        out_buf = []
+        out_rows = 0
+
+        for line in fin:
+            line = line.strip()
+            if not line:
+                continue
+
+            cur = parse_row(line)
+
+            t1, o1, h1, l1, c1, v1 = prev
+            t2, o2, h2, l2, c2, v2 = cur
+
+            dt = t2 - t1
+            steps = dt // step
+
+            if steps > 0:
+                do = o2 - o1
+                dh = h2 - h1
+                dl = l2 - l1
+                dc = c2 - c1
+                dv = v2 - v1
+
+                inv = 1.0 / steps
+                for i in range(steps):
+                    a = i * inv
+                    out_buf.append(fmt_row(
+                        t1 + i * step,
+                        o1 + do * a,
+                        h1 + dh * a,
+                        l1 + dl * a,
+                        c1 + dc * a,
+                        v1 + dv * a
+                    ))
+
+                out_rows += steps
+
+            prev = cur
+            processed += 1
+
+            # прогресс
+            if processed % 100_000 == 0:
+                now = time.time()
+                if now - last_report >= 0.5:
+                    pct = processed * 100.0 / total_lines
+                    elapsed = now - start_time
+                    speed = processed / elapsed if elapsed > 0 else 0
+                    eta = (total_lines - processed) / speed if speed > 0 else 0
+
+                    print(
+                        f"\rprocessed: {processed:,} / {total_lines:,} "
+                        f"({pct:5.1f}%) | "
+                        f"out ~ {out_rows:,} | "
+                        f"{speed:,.0f} rows/s | "
+                        f"ETA {eta/60:5.1f} min",
+                        end="",
+                        file=sys.stderr,
+                        flush=True,
+                    )
+                    last_report = now
+
+            # сброс буфера
+            if out_rows >= flush_every:
+                fout.write("".join(out_buf))
+                out_buf.clear()
+                out_rows = 0
+
+        # остатки
+        if out_buf:
+            fout.write("".join(out_buf))
+
+        # последнюю строку пишем как есть
+        t, o, h, l, c, v = prev
+        fout.write(fmt_row(t, o, h, l, c, v))
+
+    total_time = time.time() - start_time
+    print(
+        f"\nDone in {total_time/60:.1f} min",
+        file=sys.stderr
+    )
+
+
+if __name__ == "__main__":
+    ap = argparse.ArgumentParser()
+    ap.add_argument("-i", "--input", required=True)
+    ap.add_argument("-o", "--output", required=True)
+    ap.add_argument("-s", "--step", type=int, default=10)
+    ap.add_argument("--flush-every", type=int, default=200_000)
+    args = ap.parse_args()
+
+    if args.step <= 0:
+        raise SystemExit("step must be > 0")
+
+    main(args.input, args.output, args.step, args.flush_every)
Author	SHA1	Message	Date
Arity-T	a5aadbc774	Перешёл к произвольным периодам агрегации	2025-12-16 13:53:36 +00:00
Arity-T	2833d2f7b4	upsampling данных	2025-12-16 13:25:38 +00:00
Arity-T	e4e01e1df3	Неактуальная инфа	2025-12-16 12:45:11 +00:00
Arity-T	f5b6f0fc73	Скрипт для усреднения результатов	2025-12-15 14:46:27 +00:00
Arity-T	1cc9840d60	Общее время выполнения	2025-12-15 13:04:26 +00:00
Arity-T	dea7940e29	Поиск интервалов параллельный	2025-12-15 12:57:56 +00:00
Arity-T	f4ade418d6	Удаляем крайние дни	2025-12-15 11:30:50 +00:00
Arity-T	ab18d9770f	Агрегация	2025-12-13 12:45:29 +00:00