По мелочи
This commit is contained in:
@@ -13,6 +13,9 @@ scontrol show jobid <jobid>
|
|||||||
|
|
||||||
# или так, чтобы не выводить лишнюю информацию
|
# или так, чтобы не выводить лишнюю информацию
|
||||||
scontrol show jobid <jobid> | grep JobState
|
scontrol show jobid <jobid> | grep JobState
|
||||||
|
|
||||||
|
# а так можно посмотреть состояния всех задач
|
||||||
|
scontrol show job | grep JobState
|
||||||
```
|
```
|
||||||
|
|
||||||
Отменить или завершить задачу досрочно.
|
Отменить или завершить задачу досрочно.
|
||||||
|
|||||||
106
kernel.cu
106
kernel.cu
@@ -6,10 +6,12 @@
|
|||||||
|
|
||||||
// Настройки эксперимента
|
// Настройки эксперимента
|
||||||
#define USE_SHARED_MEMORY true
|
#define USE_SHARED_MEMORY true
|
||||||
#define BLOCKS_COUNT 16
|
#define BLOCKS_COUNT 10000 // 1, 10, 100, 1000, 10000
|
||||||
#define THREADS_COUNT 16 // Используется, если USE_SHARED_MEMORY == false
|
// Используется, если USE_SHARED_MEMORY == false
|
||||||
#define BLOCK_SIZE 4 // Используется, если USE_SHARED_MEMORY == true
|
#define THREADS_COUNT 100 // 1, 9, 100, 1024
|
||||||
#define MATRIX_SIZE 32
|
// Используется, если USE_SHARED_MEMORY == true
|
||||||
|
#define BLOCK_SIZE 1024 // 1, 3, 10, 32
|
||||||
|
#define MATRIX_SIZE 100 // 500, 1000, 1500
|
||||||
|
|
||||||
#define OBSTACLE_PROB 10 // Процент препятствий на полигоне
|
#define OBSTACLE_PROB 10 // Процент препятствий на полигоне
|
||||||
#define START_X 2 // Начальная точка
|
#define START_X 2 // Начальная точка
|
||||||
@@ -48,46 +50,7 @@ void print_distance_map(int* P, unsigned int* dist, int n) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ядро, не использующее разделяемую память
|
#if USE_SHARED_MEMORY
|
||||||
__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) {
|
|
||||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
|
||||||
// printf("Hello from CUDA kernel! I'm thread #%d\n", tid);
|
|
||||||
|
|
||||||
while (tid < n * n) {
|
|
||||||
int i = tid / n;
|
|
||||||
int j = tid % n;
|
|
||||||
// printf("TID = %d (real %d); i = %d; j = %d\n", tid, threadIdx.x + blockIdx.x * blockDim.x, i, j);
|
|
||||||
|
|
||||||
if (i >= n || j >= n) return;
|
|
||||||
if (P[tid] != -1) {
|
|
||||||
unsigned int current_dist = dist[tid];
|
|
||||||
unsigned int min_dist = current_dist;
|
|
||||||
|
|
||||||
// Проверка соседей с защитой от переполнения
|
|
||||||
if (i > 0 && dist[(i-1)*n + j] != INF)
|
|
||||||
min_dist = min(min_dist, dist[(i-1)*n + j] + 1);
|
|
||||||
|
|
||||||
if (i < n-1 && dist[(i+1)*n + j] != INF)
|
|
||||||
min_dist = min(min_dist, dist[(i+1)*n + j] + 1);
|
|
||||||
|
|
||||||
if (j > 0 && dist[i*n + (j-1)] != INF)
|
|
||||||
min_dist = min(min_dist, dist[i*n + (j-1)] + 1);
|
|
||||||
|
|
||||||
if (j < n-1 && dist[i*n + (j+1)] != INF)
|
|
||||||
min_dist = min(min_dist, dist[i*n + (j+1)] + 1);
|
|
||||||
|
|
||||||
if (min_dist < current_dist) {
|
|
||||||
dist[tid] = min_dist;
|
|
||||||
*changed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Каждый поток обрабатывает каждую blockDim.x * gridDim.x клетку
|
|
||||||
// printf("Increment will be: %d\n", blockDim.x * gridDim.x);
|
|
||||||
tid += blockDim.x * gridDim.x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ядро, использующее разделяемую память
|
// Ядро, использующее разделяемую память
|
||||||
__global__ void wave_step_shared(int* P, unsigned int* dist, int n, bool* global_changed) {
|
__global__ void wave_step_shared(int* P, unsigned int* dist, int n, bool* global_changed) {
|
||||||
int local_row = threadIdx.x;
|
int local_row = threadIdx.x;
|
||||||
@@ -189,6 +152,47 @@ __global__ void wave_step_shared(int* P, unsigned int* dist, int n, bool* global
|
|||||||
*global_changed = true;
|
*global_changed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
// Ядро, не использующее разделяемую память
|
||||||
|
__global__ void wave_step(int* P, unsigned int* dist, int n, bool* changed) {
|
||||||
|
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
|
// printf("Hello from CUDA kernel! I'm thread #%d\n", tid);
|
||||||
|
|
||||||
|
while (tid < n * n) {
|
||||||
|
int i = tid / n;
|
||||||
|
int j = tid % n;
|
||||||
|
// printf("TID = %d (real %d); i = %d; j = %d\n", tid, threadIdx.x + blockIdx.x * blockDim.x, i, j);
|
||||||
|
|
||||||
|
if (i >= n || j >= n) return;
|
||||||
|
if (P[tid] != -1) {
|
||||||
|
unsigned int current_dist = dist[tid];
|
||||||
|
unsigned int min_dist = current_dist;
|
||||||
|
|
||||||
|
// Проверка соседей с защитой от переполнения
|
||||||
|
if (i > 0 && dist[(i-1)*n + j] != INF)
|
||||||
|
min_dist = min(min_dist, dist[(i-1)*n + j] + 1);
|
||||||
|
|
||||||
|
if (i < n-1 && dist[(i+1)*n + j] != INF)
|
||||||
|
min_dist = min(min_dist, dist[(i+1)*n + j] + 1);
|
||||||
|
|
||||||
|
if (j > 0 && dist[i*n + (j-1)] != INF)
|
||||||
|
min_dist = min(min_dist, dist[i*n + (j-1)] + 1);
|
||||||
|
|
||||||
|
if (j < n-1 && dist[i*n + (j+1)] != INF)
|
||||||
|
min_dist = min(min_dist, dist[i*n + (j+1)] + 1);
|
||||||
|
|
||||||
|
if (min_dist < current_dist) {
|
||||||
|
dist[tid] = min_dist;
|
||||||
|
*changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Каждый поток обрабатывает каждую blockDim.x * gridDim.x клетку
|
||||||
|
// printf("Increment will be: %d\n", blockDim.x * gridDim.x);
|
||||||
|
tid += blockDim.x * gridDim.x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
const int n = MATRIX_SIZE;
|
const int n = MATRIX_SIZE;
|
||||||
@@ -227,12 +231,14 @@ int main() {
|
|||||||
// printf("Wave step #%d\n", iterations);
|
// printf("Wave step #%d\n", iterations);
|
||||||
changed = false;
|
changed = false;
|
||||||
cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice);
|
cudaMemcpy(d_changed, &changed, sizeof(bool), cudaMemcpyHostToDevice);
|
||||||
if (USE_SHARED_MEMORY) {
|
|
||||||
|
#if USE_SHARED_MEMORY
|
||||||
dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
wave_step_shared<<<BLOCKS_COUNT, block>>>(d_P, d_dist, n, d_changed);
|
wave_step_shared<<<BLOCKS_COUNT, block>>>(d_P, d_dist, n, d_changed);
|
||||||
} else {
|
#else
|
||||||
wave_step<<<BLOCKS_COUNT, THREADS_COUNT>>>(d_P, d_dist, n, d_changed);
|
wave_step<<<BLOCKS_COUNT, THREADS_COUNT>>>(d_P, d_dist, n, d_changed);
|
||||||
}
|
#endif
|
||||||
|
|
||||||
cudaDeviceSynchronize(); // Синхронизация после ядра
|
cudaDeviceSynchronize(); // Синхронизация после ядра
|
||||||
cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost);
|
cudaMemcpy(&changed, d_changed, sizeof(bool), cudaMemcpyDeviceToHost);
|
||||||
iterations++;
|
iterations++;
|
||||||
@@ -254,14 +260,14 @@ int main() {
|
|||||||
|
|
||||||
// Вывод результатов
|
// Вывод результатов
|
||||||
printf("Time: %.2f ms\n", milliseconds);
|
printf("Time: %.2f ms\n", milliseconds);
|
||||||
if (USE_SHARED_MEMORY) {
|
#if USE_SHARED_MEMORY
|
||||||
printf("Results of the algorithm using shared memory.\n");
|
printf("Results of the algorithm using shared memory.\n");
|
||||||
printf("Matrix: %dx%d | BlocksXThreadXThread: %dx%dx%d | Obstacles: %d%%\n\n",
|
printf("Matrix: %dx%d | BlocksXThreadXThread: %dx%dx%d | Obstacles: %d%%\n\n",
|
||||||
n, n, BLOCKS_COUNT, BLOCK_SIZE, BLOCK_SIZE, OBSTACLE_PROB);
|
n, n, BLOCKS_COUNT, BLOCK_SIZE, BLOCK_SIZE, OBSTACLE_PROB);
|
||||||
} else {
|
#else
|
||||||
printf("Matrix: %dx%d | BlocksXThreads: %dx%d | Obstacles: %d%%\n\n",
|
printf("Matrix: %dx%d | BlocksXThreads: %dx%d | Obstacles: %d%%\n\n",
|
||||||
n, n, BLOCKS_COUNT, THREADS_COUNT, OBSTACLE_PROB);
|
n, n, BLOCKS_COUNT, THREADS_COUNT, OBSTACLE_PROB);
|
||||||
}
|
#endif
|
||||||
|
|
||||||
if (MATRIX_SIZE <= 100)
|
if (MATRIX_SIZE <= 100)
|
||||||
print_distance_map(P, dist, MATRIX_SIZE);
|
print_distance_map(P, dist, MATRIX_SIZE);
|
||||||
|
|||||||
Reference in New Issue
Block a user