From 2d5c5f36a24aa8ee3b606a80a5a46ae3d393f5eb Mon Sep 17 00:00:00 2001 From: Arity-T Date: Tue, 1 Apr 2025 14:32:18 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=20=D0=BC=D0=B5=D0=BB=D0=BE=D1=87?= =?UTF-8?q?=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 3 ++ kernel.cu | 106 ++++++++++++++++++++++++++++-------------------------- 2 files changed, 59 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index b996ade..bd3f709 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,9 @@ scontrol show jobid # или так, чтобы не выводить лишнюю информацию scontrol show jobid | grep JobState + +# а так можно посмотреть состояния всех задач +scontrol show job | grep JobState ``` Отменить или завершить задачу досрочно. diff --git a/kernel.cu b/kernel.cu index 0d9cbe6..55bf063 100644 --- a/kernel.cu +++ b/kernel.cu @@ -6,10 +6,12 @@ // Настройки эксперимента #define USE_SHARED_MEMORY true -#define BLOCKS_COUNT 16 -#define THREADS_COUNT 16 // Используется, если USE_SHARED_MEMORY == false -#define BLOCK_SIZE 4 // Используется, если USE_SHARED_MEMORY == true -#define MATRIX_SIZE 32 +#define BLOCKS_COUNT 10000 // 1, 10, 100, 1000, 10000 +// Используется, если USE_SHARED_MEMORY == false +#define THREADS_COUNT 100 // 1, 9, 100, 1024 +// Используется, если USE_SHARED_MEMORY == true +#define BLOCK_SIZE 1024 // 1, 3, 10, 32 +#define MATRIX_SIZE 100 // 500, 1000, 1500 #define OBSTACLE_PROB 10 // Процент препятствий на полигоне #define START_X 2 // Начальная точка @@ -48,46 +50,7 @@ void print_distance_map(int* P, unsigned int* dist, int n) { } } -// Ядро, не использующее разделяемую память -__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - // printf("Hello from CUDA kernel! I'm thread #%d\n", tid); - - while (tid < n * n) { - int i = tid / n; - int j = tid % n; - // printf("TID = %d (real %d); i = %d; j = %d\n", tid, threadIdx.x + blockIdx.x * blockDim.x, i, j); - - if (i >= n || j >= n) return; - if (P[tid] != -1) { - unsigned int current_dist = dist[tid]; - unsigned int min_dist = current_dist; - - // Проверка соседей с защитой от переполнения - if (i > 0 && dist[(i-1)*n + j] != INF) - min_dist = min(min_dist, dist[(i-1)*n + j] + 1); - - if (i < n-1 && dist[(i+1)*n + j] != INF) - min_dist = min(min_dist, dist[(i+1)*n + j] + 1); - - if (j > 0 && dist[i*n + (j-1)] != INF) - min_dist = min(min_dist, dist[i*n + (j-1)] + 1); - - if (j < n-1 && dist[i*n + (j+1)] != INF) - min_dist = min(min_dist, dist[i*n + (j+1)] + 1); - - if (min_dist < current_dist) { - dist[tid] = min_dist; - *changed = true; - } - } - - // Каждый поток обрабатывает каждую blockDim.x * gridDim.x клетку - // printf("Increment will be: %d\n", blockDim.x * gridDim.x); - tid += blockDim.x * gridDim.x; - } -} - +#if USE_SHARED_MEMORY // Ядро, использующее разделяемую память __global__ void wave_step_shared(int* P, unsigned int* dist, int n, bool* global_changed) { int local_row = threadIdx.x; @@ -189,6 +152,47 @@ __global__ void wave_step_shared(int* P, unsigned int* dist, int n, bool* global *global_changed = true; } } +#else +// Ядро, не использующее разделяемую память +__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + // printf("Hello from CUDA kernel! I'm thread #%d\n", tid); + + while (tid < n * n) { + int i = tid / n; + int j = tid % n; + // printf("TID = %d (real %d); i = %d; j = %d\n", tid, threadIdx.x + blockIdx.x * blockDim.x, i, j); + + if (i >= n || j >= n) return; + if (P[tid] != -1) { + unsigned int current_dist = dist[tid]; + unsigned int min_dist = current_dist; + + // Проверка соседей с защитой от переполнения + if (i > 0 && dist[(i-1)*n + j] != INF) + min_dist = min(min_dist, dist[(i-1)*n + j] + 1); + + if (i < n-1 && dist[(i+1)*n + j] != INF) + min_dist = min(min_dist, dist[(i+1)*n + j] + 1); + + if (j > 0 && dist[i*n + (j-1)] != INF) + min_dist = min(min_dist, dist[i*n + (j-1)] + 1); + + if (j < n-1 && dist[i*n + (j+1)] != INF) + min_dist = min(min_dist, dist[i*n + (j+1)] + 1); + + if (min_dist < current_dist) { + dist[tid] = min_dist; + *changed = true; + } + } + + // Каждый поток обрабатывает каждую blockDim.x * gridDim.x клетку + // printf("Increment will be: %d\n", blockDim.x * gridDim.x); + tid += blockDim.x * gridDim.x; + } +} +#endif int main() { const int n = MATRIX_SIZE; @@ -227,12 +231,14 @@ int main() { // printf("Wave step #%d\n", iterations); changed = false; cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice); - if (USE_SHARED_MEMORY) { + + #if USE_SHARED_MEMORY dim3 block(BLOCK_SIZE, BLOCK_SIZE); wave_step_shared<<>>(d_P, d_dist, n, d_changed); - } else { + #else wave_step<<>>(d_P, d_dist, n, d_changed); - } + #endif + cudaDeviceSynchronize(); // Синхронизация после ядра cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost); iterations++; @@ -254,14 +260,14 @@ int main() { // Вывод результатов printf("Time: %.2f ms\n", milliseconds); - if (USE_SHARED_MEMORY) { + #if USE_SHARED_MEMORY printf("Results of the algorithm using shared memory.\n"); printf("Matrix: %dx%d | BlocksXThreadXThread: %dx%dx%d | Obstacles: %d%%\n\n", n, n, BLOCKS_COUNT, BLOCK_SIZE, BLOCK_SIZE, OBSTACLE_PROB); - } else { + #else printf("Matrix: %dx%d | BlocksXThreads: %dx%d | Obstacles: %d%%\n\n", n, n, BLOCKS_COUNT, THREADS_COUNT, OBSTACLE_PROB); - } + #endif if (MATRIX_SIZE <= 100) print_distance_map(P, dist, MATRIX_SIZE);