На GPU вычисления

2025-12-02 12:39:09 +00:00
parent 78bdb1ddb7
commit 73c9e580e4
5 changed files with 344 additions and 17 deletions
--- a/src/gpu_plugin.cu
+++ b/src/gpu_plugin.cu
@@ -1,4 +1,27 @@
 #include <cuda_runtime.h>
+#include <cstdint>
+#include <cfloat>
+
+// Структуры данных (должны совпадать с C++ кодом)
+struct GpuRecord {
+    double timestamp;
+    double open;
+    double high;
+    double low;
+    double close;
+    double volume;
+};
+
+struct GpuDayStats {
+    long long day;
+    double low;
+    double high;
+    double open;
+    double close;
+    double avg;
+    double first_ts;
+    double last_ts;
+};

 extern "C" int gpu_is_available() {
    int n = 0;
@@ -6,3 +29,139 @@ extern "C" int gpu_is_available() {
    if (err != cudaSuccess) return 0;
    return (n > 0) ? 1 : 0;
 }
+
+// Kernel для агрегации (один поток обрабатывает все данные)
+__global__ void aggregate_kernel(
+    const GpuRecord* records,
+    int num_records,
+    const int* day_offsets,    // начало каждого дня в массиве records
+    const int* day_counts,     // количество записей в каждом дне
+    const long long* day_indices, // индексы дней
+    int num_days,
+    GpuDayStats* out_stats)
+{
+    // Один поток обрабатывает все дни последовательно
+    for (int d = 0; d < num_days; d++) {
+        int offset = day_offsets[d];
+        int count = day_counts[d];
+        
+        GpuDayStats stats;
+        stats.day = day_indices[d];
+        stats.low = DBL_MAX;
+        stats.high = -DBL_MAX;
+        stats.first_ts = DBL_MAX;
+        stats.last_ts = -DBL_MAX;
+        stats.open = 0;
+        stats.close = 0;
+        
+        for (int i = 0; i < count; i++) {
+            const GpuRecord& r = records[offset + i];
+            
+            // min/max
+            if (r.low < stats.low) stats.low = r.low;
+            if (r.high > stats.high) stats.high = r.high;
+            
+            // first/last по timestamp
+            if (r.timestamp < stats.first_ts) {
+                stats.first_ts = r.timestamp;
+                stats.open = r.open;
+            }
+            if (r.timestamp > stats.last_ts) {
+                stats.last_ts = r.timestamp;
+                stats.close = r.close;
+            }
+        }
+        
+        stats.avg = (stats.low + stats.high) / 2.0;
+        out_stats[d] = stats;
+    }
+}
+
+// Функция агрегации, вызываемая из C++
+extern "C" int gpu_aggregate_days(
+    const GpuRecord* h_records,
+    int num_records,
+    const int* h_day_offsets,
+    const int* h_day_counts,
+    const long long* h_day_indices,
+    int num_days,
+    GpuDayStats* h_out_stats)
+{
+    // Выделяем память на GPU
+    GpuRecord* d_records = nullptr;
+    int* d_day_offsets = nullptr;
+    int* d_day_counts = nullptr;
+    long long* d_day_indices = nullptr;
+    GpuDayStats* d_out_stats = nullptr;
+    
+    cudaError_t err;
+    
+    err = cudaMalloc(&d_records, num_records * sizeof(GpuRecord));
+    if (err != cudaSuccess) return -1;
+    
+    err = cudaMalloc(&d_day_offsets, num_days * sizeof(int));
+    if (err != cudaSuccess) { cudaFree(d_records); return -2; }
+    
+    err = cudaMalloc(&d_day_counts, num_days * sizeof(int));
+    if (err != cudaSuccess) { cudaFree(d_records); cudaFree(d_day_offsets); return -3; }
+    
+    err = cudaMalloc(&d_day_indices, num_days * sizeof(long long));
+    if (err != cudaSuccess) { cudaFree(d_records); cudaFree(d_day_offsets); cudaFree(d_day_counts); return -4; }
+    
+    err = cudaMalloc(&d_out_stats, num_days * sizeof(GpuDayStats));
+    if (err != cudaSuccess) { cudaFree(d_records); cudaFree(d_day_offsets); cudaFree(d_day_counts); cudaFree(d_day_indices); return -5; }
+    
+    // Копируем данные на GPU
+    err = cudaMemcpy(d_records, h_records, num_records * sizeof(GpuRecord), cudaMemcpyHostToDevice);
+    if (err != cudaSuccess) return -10;
+    
+    err = cudaMemcpy(d_day_offsets, h_day_offsets, num_days * sizeof(int), cudaMemcpyHostToDevice);
+    if (err != cudaSuccess) return -11;
+    
+    err = cudaMemcpy(d_day_counts, h_day_counts, num_days * sizeof(int), cudaMemcpyHostToDevice);
+    if (err != cudaSuccess) return -12;
+    
+    err = cudaMemcpy(d_day_indices, h_day_indices, num_days * sizeof(long long), cudaMemcpyHostToDevice);
+    if (err != cudaSuccess) return -13;
+    
+    // Запускаем kernel (1 блок, 1 поток)
+    aggregate_kernel<<<1, 1>>>(
+        d_records, num_records,
+        d_day_offsets, d_day_counts, d_day_indices,
+        num_days, d_out_stats
+    );
+    
+    // Проверяем ошибку запуска kernel
+    err = cudaGetLastError();
+    if (err != cudaSuccess) {
+        cudaFree(d_records);
+        cudaFree(d_day_offsets);
+        cudaFree(d_day_counts);
+        cudaFree(d_day_indices);
+        cudaFree(d_out_stats);
+        return -7;
+    }
+    
+    // Ждём завершения
+    err = cudaDeviceSynchronize();
+    if (err != cudaSuccess) {
+        cudaFree(d_records);
+        cudaFree(d_day_offsets);
+        cudaFree(d_day_counts);
+        cudaFree(d_day_indices);
+        cudaFree(d_out_stats);
+        return -6;
+    }
+    
+    // Копируем результат обратно
+    cudaMemcpy(h_out_stats, d_out_stats, num_days * sizeof(GpuDayStats), cudaMemcpyDeviceToHost);
+    
+    // Освобождаем память
+    cudaFree(d_records);
+    cudaFree(d_day_offsets);
+    cudaFree(d_day_counts);
+    cudaFree(d_day_indices);
+    cudaFree(d_out_stats);
+    
+    return 0;
+}