Произвольное число блоков и потоков
This commit is contained in:
86
kernel.cu
86
kernel.cu
@@ -4,7 +4,8 @@
|
|||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <device_launch_parameters.h>
|
#include <device_launch_parameters.h>
|
||||||
|
|
||||||
#define BLOCK_SIZE 16
|
#define BLOCKS_COUNT 16
|
||||||
|
#define THREADS_COUNT 16
|
||||||
#define MATRIX_SIZE 32
|
#define MATRIX_SIZE 32
|
||||||
#define OBSTACLE_PROB 10
|
#define OBSTACLE_PROB 10
|
||||||
#define START_X 2
|
#define START_X 2
|
||||||
@@ -14,35 +15,6 @@
|
|||||||
|
|
||||||
#define INF UINT_MAX // Используем беззнаковый максимум
|
#define INF UINT_MAX // Используем беззнаковый максимум
|
||||||
|
|
||||||
__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) {
|
|
||||||
int i = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
int j = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
int idx = i * n + j;
|
|
||||||
|
|
||||||
if (i >= n || j >= n || P[idx] == -1) return;
|
|
||||||
|
|
||||||
unsigned int current_dist = dist[idx];
|
|
||||||
unsigned int min_dist = current_dist;
|
|
||||||
|
|
||||||
// Проверка соседей с защитой от переполнения
|
|
||||||
if (i > 0 && dist[(i-1)*n + j] != INF)
|
|
||||||
min_dist = min(min_dist, dist[(i-1)*n + j] + 1);
|
|
||||||
|
|
||||||
if (i < n-1 && dist[(i+1)*n + j] != INF)
|
|
||||||
min_dist = min(min_dist, dist[(i+1)*n + j] + 1);
|
|
||||||
|
|
||||||
if (j > 0 && dist[i*n + (j-1)] != INF)
|
|
||||||
min_dist = min(min_dist, dist[i*n + (j-1)] + 1);
|
|
||||||
|
|
||||||
if (j < n-1 && dist[i*n + (j+1)] != INF)
|
|
||||||
min_dist = min(min_dist, dist[i*n + (j+1)] + 1);
|
|
||||||
|
|
||||||
if (min_dist < current_dist) {
|
|
||||||
atomicMin(&dist[idx], min_dist);
|
|
||||||
*changed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void generate_polygon(int* P, int n) {
|
void generate_polygon(int* P, int n) {
|
||||||
srand(42);
|
srand(42);
|
||||||
for (int i = 0; i < n*n; i++) {
|
for (int i = 0; i < n*n; i++) {
|
||||||
@@ -71,9 +43,47 @@ void print_distance_map(int* P, unsigned int* dist, int n) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) {
|
||||||
|
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
// printf("Hello from CUDA kernel! I'm thread #%d\n", tid);
|
||||||
|
|
||||||
|
while (tid < n * n) {
|
||||||
|
int i = tid / n;
|
||||||
|
int j = tid % n;
|
||||||
|
// printf("TID = %d (real %d); i = %d; j = %d\n", tid, threadIdx.x + blockIdx.x * blockDim.x, i, j);
|
||||||
|
|
||||||
|
if (i >= n || j >= n) return;
|
||||||
|
if (P[tid] != -1) {
|
||||||
|
unsigned int current_dist = dist[tid];
|
||||||
|
unsigned int min_dist = current_dist;
|
||||||
|
|
||||||
|
// Проверка соседей с защитой от переполнения
|
||||||
|
if (i > 0 && dist[(i-1)*n + j] != INF)
|
||||||
|
min_dist = min(min_dist, dist[(i-1)*n + j] + 1);
|
||||||
|
|
||||||
|
if (i < n-1 && dist[(i+1)*n + j] != INF)
|
||||||
|
min_dist = min(min_dist, dist[(i+1)*n + j] + 1);
|
||||||
|
|
||||||
|
if (j > 0 && dist[i*n + (j-1)] != INF)
|
||||||
|
min_dist = min(min_dist, dist[i*n + (j-1)] + 1);
|
||||||
|
|
||||||
|
if (j < n-1 && dist[i*n + (j+1)] != INF)
|
||||||
|
min_dist = min(min_dist, dist[i*n + (j+1)] + 1);
|
||||||
|
|
||||||
|
if (min_dist < current_dist) {
|
||||||
|
dist[tid] = min_dist;
|
||||||
|
*changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Каждый поток обрабатывает каждую blockDim.x * gridDim.x клетку
|
||||||
|
// printf("Increment will be: %d\n", blockDim.x * gridDim.x);
|
||||||
|
tid += blockDim.x * gridDim.x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
const int n = MATRIX_SIZE;
|
const int n = MATRIX_SIZE;
|
||||||
const int block_size = BLOCK_SIZE;
|
|
||||||
|
|
||||||
// Инициализация полигона
|
// Инициализация полигона
|
||||||
int* P = (int*)malloc(n * n * sizeof(int));
|
int* P = (int*)malloc(n * n * sizeof(int));
|
||||||
@@ -96,10 +106,6 @@ int main() {
|
|||||||
cudaMemcpy(d_P, P, n*n*sizeof(int), cudaMemcpyHostToDevice);
|
cudaMemcpy(d_P, P, n*n*sizeof(int), cudaMemcpyHostToDevice);
|
||||||
cudaMemcpy(d_dist, dist, n*n*sizeof(unsigned int), cudaMemcpyHostToDevice);
|
cudaMemcpy(d_dist, dist, n*n*sizeof(unsigned int), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
// Настройка запуска ядра
|
|
||||||
dim3 grid((n + block_size - 1)/block_size, (n + block_size - 1)/block_size);
|
|
||||||
dim3 block(block_size, block_size);
|
|
||||||
|
|
||||||
// Замер времени
|
// Замер времени
|
||||||
cudaEvent_t start, stop;
|
cudaEvent_t start, stop;
|
||||||
cudaEventCreate(&start);
|
cudaEventCreate(&start);
|
||||||
@@ -110,9 +116,10 @@ int main() {
|
|||||||
int iterations = 0;
|
int iterations = 0;
|
||||||
bool changed;
|
bool changed;
|
||||||
do {
|
do {
|
||||||
|
// printf("Wave step #%d\n", iterations);
|
||||||
changed = false;
|
changed = false;
|
||||||
cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice);
|
cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice);
|
||||||
wave_step<<<grid, block>>>(d_P, d_dist, n, d_changed);
|
wave_step<<<BLOCKS_COUNT, THREADS_COUNT>>>(d_P, d_dist, n, d_changed);
|
||||||
cudaDeviceSynchronize(); // Синхронизация после ядра
|
cudaDeviceSynchronize(); // Синхронизация после ядра
|
||||||
cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost);
|
cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost);
|
||||||
iterations++;
|
iterations++;
|
||||||
@@ -132,11 +139,12 @@ int main() {
|
|||||||
printf("Success! Path length: %u\n", dist[FINISH_X + FINISH_Y * MATRIX_SIZE]);
|
printf("Success! Path length: %u\n", dist[FINISH_X + FINISH_Y * MATRIX_SIZE]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Вывод результатов
|
||||||
printf("Time: %.2f ms\n", milliseconds);
|
printf("Time: %.2f ms\n", milliseconds);
|
||||||
printf("Matrix: %dx%d | Blocks: %dx%d | Obstacles: %d%%\n\n",
|
printf("Matrix: %dx%d | BlocksXThreads: %dx%d | Obstacles: %d%%\n\n",
|
||||||
n, n, block_size, block_size, OBSTACLE_PROB);
|
n, n, BLOCKS_COUNT, THREADS_COUNT, OBSTACLE_PROB);
|
||||||
|
|
||||||
if (MATRIX_SIZE < 100)
|
if (MATRIX_SIZE <= 100)
|
||||||
print_distance_map(P, dist, MATRIX_SIZE);
|
print_distance_map(P, dist, MATRIX_SIZE);
|
||||||
|
|
||||||
// Освобождение памяти
|
// Освобождение памяти
|
||||||
|
|||||||
Reference in New Issue
Block a user