From: Randy McShandy Date: Tue, 12 Dec 2023 04:52:32 +0000 (-0600) Subject: bit of regression in theads synching properly but locking up eventually X-Git-Url: http://git.mcshandy.xyz/gitweb.cgi?a=commitdiff_plain;h=7c0a83340734a7d0b89ce2bf77b6482befc264c4;p=barrow_crawler bit of regression in theads synching properly but locking up eventually --- diff --git a/main.c b/main.c index ade90ce..057f71d 100755 --- a/main.c +++ b/main.c @@ -4,6 +4,7 @@ #include #include #include +#include #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" @@ -49,9 +50,10 @@ typedef struct int width; int height; - pthread_mutex_t* mutex; - pthread_cond_t* worker_cond; - pthread_cond_t* boss_cond; + pthread_mutex_t* mutex; + pthread_mutex_t worker_mutex; + pthread_cond_t* worker_cond; + pthread_cond_t* boss_cond; int* waiting_workers; int worker_count; @@ -135,15 +137,19 @@ float kernel_sum(Mat3 kernel, Mat3 source) float rd_a_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B) { float a_prime = 1.0f; + int x_less = (x-1 < 0) ? x : x-1; + int y_less = (y-1 < 0) ? y : y-1; + int x_more = (x+1 == GRID_X) ? x : x+1; + int y_more = (y+1 == GRID_Y) ? y : y+1; // Use species A in the convolution, b_prime will need B // For now we won't iterate over edge rows and columns to avoid special case vomit Mat3 source = { .v = { - {grid[x-1][y-1].a, grid[x][y-1].a, grid[x+1][y-1].a}, - {grid[x-1][y+0].a, grid[x][y+0].a, grid[x+1][y+0].a}, - {grid[x-1][y+1].a, grid[x][y+1].a, grid[x+1][y+1].a}, + {grid[x_less][y_less].a, grid[x][y_less].a, grid[x_more][y_less].a}, + {grid[x_less][y+0].a, grid[x][y+0].a, grid[x_more][y+0].a}, + {grid[x_less][y_more].a, grid[x][y_more].a, grid[x_more][y_more].a}, } }; @@ -154,14 +160,18 @@ float rd_a_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, f float rd_b_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B) { float b_prime = 1.0f; + int x_less = (x-1 < 0) ? x : x-1; + int y_less = (y-1 < 0) ? y : y-1; + int x_more = (x+1 == GRID_X) ? x : x+1; + int y_more = (y+1 == GRID_Y) ? y : y+1; // Use species A in the convolution, b_prime will need B Mat3 source = { .v = { - {grid[x-1][y-1].b, grid[x][y-1].b, grid[x+1][y-1].b}, - {grid[x-1][y+0].b, grid[x][y+0].b, grid[x+1][y+0].b}, - {grid[x-1][y+1].b, grid[x][y+1].b, grid[x+1][y+1].b}, + {grid[x_less][y_less].b, grid[x][y_less].b, grid[x_more][y_less].b}, + {grid[x_less][y+0].b, grid[x][y+0].b, grid[x_more][y+0].b}, + {grid[x_less][y_more].b, grid[x][y_more].b, grid[x_more][y_more].b}, } }; @@ -181,9 +191,9 @@ void* iterator(void* _arg) for (int iterations = 0; iterations < warg->max_iterations; iterations++) { printf("worker %d: work unit %d/%d\n", warg->worker_id, iterations, warg->max_iterations); - for (int x = start_x; x < w + start_x - 1; x++) + for (int x = start_x; x < w + start_x; x++) { - for (int y = start_y; y < h + start_y - 1; y++) + for (int y = start_y; y < h + start_y; y++) { FVec2 each = grid[x][y]; grid_prime[x][y].a = rd_a_prime(grid, opts, x, y, laplacian_kernel, each.a, each.b); @@ -195,7 +205,7 @@ void* iterator(void* _arg) printf("worker %d: waking up boss\n", warg->worker_id); pthread_cond_signal(warg->boss_cond); - pthread_cond_wait(warg->worker_cond, warg->mutex); + pthread_cond_wait(warg->worker_cond, &warg->worker_mutex); } @@ -241,7 +251,7 @@ int main(int argc, char** argv) int waiting_workers = 0; worker_arg warg = { - opts, grid, grid_prime, 1, 1, (GRID_X-1), (GRID_Y-1), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4 + opts, grid, grid_prime, 0, 0, (GRID_X), (GRID_Y), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4 }; pthread_t threads[warg.worker_count]; pthread_mutex_t mutex; @@ -262,8 +272,9 @@ int main(int argc, char** argv) { wargs[t] = warg; wargs[t].worker_id = t; - wargs[t].width = ((GRID_X)/warg.worker_count) - 1; - wargs[t].start_x = 1 + (wargs[t].width * (t)); + wargs[t].width = (GRID_X/warg.worker_count); + wargs[t].start_x = (wargs[t].width * t); + pthread_mutex_init(&wargs[t].worker_mutex, NULL); printf("worker %d x_span %d, %d\n", t, wargs[t].start_x, wargs[t].width); pthread_create(&threads[t], NULL, iterator, &wargs[t]); } @@ -282,9 +293,7 @@ int main(int argc, char** argv) grid = grid_prime; grid_prime = grid_temp; printf("boss: workload %d/%d\n", iterations, max_iterations); - { - //sleep(1); // segfault somewhere in here lmao if (1 && (iterations % (max_iterations/100) == 0)) {