From a16691abffcba8a7c5f897a00dfbb29ed97f228d Mon Sep 17 00:00:00 2001 From: Randy McShandy Date: Mon, 11 Dec 2023 22:10:46 -0600 Subject: [PATCH] Big progress on multithreading model --- gif_export.sh | 2 +- main.c | 513 ++++++++++++++++++++++++++------------------------ 2 files changed, 266 insertions(+), 249 deletions(-) diff --git a/gif_export.sh b/gif_export.sh index 772498b..efbc234 100755 --- a/gif_export.sh +++ b/gif_export.sh @@ -2,4 +2,4 @@ # cd $1 rm output.gif -magick -delay 10 -loop 0 $(ls . | sort -n) ./output.gif +magick -delay 10 -loop 0 -antialias $(ls . | sort -n) ./output.gif diff --git a/main.c b/main.c index 34c296b..ade90ce 100755 --- a/main.c +++ b/main.c @@ -3,304 +3,321 @@ #include #include #include -#include #include -#include -#include -#include #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" #define GRID_X 128 #define GRID_Y GRID_X -#define SHADOW -#ifdef SHADOW +#define SHADOW 0 +#if SHADOW #define printf(a,...) #endif -int thread_done = 0; -sem_t* iterator_sem; typedef struct { - double v[3][3]; + double v[3][3]; } Mat3; typedef struct { - float a; - float b; + float a; + float b; } FVec2; typedef struct { - double diff_a; - double diff_b; - double feed; - double kill; - double delta_t; - - char name[32]; + int max_iterations; + double diff_a; + double diff_b; + double feed; + double kill; + double delta_t; + + char name[32]; } RD_Opts; typedef struct { - RD_Opts opts; - FVec2** source; - FVec2** dest; - int start_x; - int start_y; - int w; - int h; -} IteratorArgObj; + RD_Opts opts; + FVec2** source; + FVec2** dest; + int start_x; + int start_y; + int width; + int height; + + pthread_mutex_t* mutex; + pthread_cond_t* worker_cond; + pthread_cond_t* boss_cond; + + int* waiting_workers; + int worker_count; + int max_iterations; + int worker_id; +} worker_arg; FVec2 ** grid; FVec2 ** grid_prime; FVec2 ** grid_temp; +int master_iteration_done; + +// Connectivity increases as time passes (iterations or time delta)? +// Seems we like <1.0 time deltas and higher iterations +// diff values influence tightness, delta between them shouldn't get wider than ~0.5 or narrower than ~0.4 +RD_Opts puffer = +{ + .max_iterations = 1e4, + .diff_a = 0.7f, + .diff_b = 0.3f, + .feed = 0.055f, + .kill = 0.062f, + .delta_t = 1.0f, + + .name = "puffer" +}; + +RD_Opts worms = +{ + .max_iterations = 1e4, + .diff_a = 0.7f, + .diff_b = 0.3f, + .feed = 0.082, + .kill = 0.06f, + .delta_t = 0.6f, + + .name = "worms" +}; + +RD_Opts meiosis = +{ + .max_iterations = 1e4, + .diff_a = 0.7f, + .diff_b = 0.3f, + .feed = 0.0367f, + .kill = 0.0649f, + .delta_t = 1.0f, + + .name = "meiosis" +}; const Mat3 laplacian_kernel = { - // oh shit - // Sum of all members must balance to 0.0 (or outer members have to do 1.0) - // it looks like any modifications should be on the order of 0.01 or less - // upper value of 22 <> 23 seems to be a good point - .v = { - {0.05f, 0.20f, 0.05f}, - {0.20f, -1.0f, 0.20f}, - {0.05f, 0.20f, 0.05f} - } + // oh shit + // Sum of all members must balance to 0.0 (or outer members have to do 1.0) + // it looks like any modifications should be on the order of 0.01 or less + // upper value of 22 <> 23 seems to be a good point + .v = { + {0.05f, 0.20f, 0.05f}, + {0.20f, -1.0f, 0.20f}, + {0.05f, 0.20f, 0.05f} + } }; -float convolve(Mat3 kernel, Mat3 source) +float kernel_sum(Mat3 kernel, Mat3 source) { - float result = 0.0f; - - // Convolve kernel at grid[x,y] - for (int ix = 0; ix < 3; ix++) - { - for (int iy = 0; iy < 3; iy++) - { - result += (kernel.v[ix][iy] * source.v[ix][iy]); - } - } - - return result; + float result = 0.0f; + + // Convolve kernel at grid[x,y] + for (int ix = 0; ix < 3; ix++) + { + for (int iy = 0; iy < 3; iy++) + { + result += (kernel.v[ix][iy] * source.v[ix][iy]); + } + } + + return result; } float rd_a_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B) { - float a_prime = 1.0f; - // Use species A in the convolution, b_prime will need B - // For now we won't iterate over edge rows and columns to avoid special case vomit - Mat3 source = - { - .v = - { - {grid[x-1][y-1].a, grid[x][y-1].a, grid[x+1][y-1].a}, - {grid[x-1][y+0].a, grid[x][y+0].a, grid[x+1][y+0].a}, - {grid[x-1][y+1].a, grid[x][y+1].a, grid[x+1][y+1].a}, - } - }; - - a_prime = A + (opts.diff_a*(convolve(kernel, source)) - (A*B*B) + opts.feed*(1.0f-A)) * opts.delta_t; - return a_prime; + float a_prime = 1.0f; + // Use species A in the convolution, b_prime will need B + // For now we won't iterate over edge rows and columns to avoid special case vomit + Mat3 source = + { + .v = + { + {grid[x-1][y-1].a, grid[x][y-1].a, grid[x+1][y-1].a}, + {grid[x-1][y+0].a, grid[x][y+0].a, grid[x+1][y+0].a}, + {grid[x-1][y+1].a, grid[x][y+1].a, grid[x+1][y+1].a}, + } + }; + + a_prime = A + (opts.diff_a*(kernel_sum(kernel, source)) - (A*B*B) + opts.feed*(1.0f-A)) * opts.delta_t; + return a_prime; } float rd_b_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B) { - float b_prime = 1.0f; - // Use species A in the convolution, b_prime will need B - Mat3 source = - { - .v = - { - {grid[x-1][y-1].b, grid[x][y-1].b, grid[x+1][y-1].b}, - {grid[x-1][y+0].b, grid[x][y+0].b, grid[x+1][y+0].b}, - {grid[x-1][y+1].b, grid[x][y+1].b, grid[x+1][y+1].b}, - } - }; - - b_prime = B + (opts.diff_b*(convolve(kernel, source)) + A*(B*B) - (opts.kill + opts.feed)*B) * opts.delta_t; - return b_prime; -} - -void iterate(RD_Opts opts, FVec2** source, FVec2** dest, int start_x, int start_y, int w, int h, int stride) -{ - for (int x = start_x; x < w + start_x - 1; x++) - { - for (int y = start_y; y < h + start_y -1; y++) - { - FVec2 each = source[x][y]; - dest[x][y].a = rd_a_prime(source, opts, x, y, laplacian_kernel, each.a, each.b); - dest[x][y].b = rd_b_prime(source, opts, x, y, laplacian_kernel, each.a, each.b); - } - } - + float b_prime = 1.0f; + // Use species A in the convolution, b_prime will need B + Mat3 source = + { + .v = + { + {grid[x-1][y-1].b, grid[x][y-1].b, grid[x+1][y-1].b}, + {grid[x-1][y+0].b, grid[x][y+0].b, grid[x+1][y+0].b}, + {grid[x-1][y+1].b, grid[x][y+1].b, grid[x+1][y+1].b}, + } + }; + + b_prime = B + (opts.diff_b*(kernel_sum(kernel, source)) + A*(B*B) - (opts.kill + opts.feed)*B) * opts.delta_t; + return b_prime; } void* iterator(void* _arg) { - sem_t* sem = sem_open("itersem", O_CREAT); - IteratorArgObj* arg = (IteratorArgObj*)_arg; - RD_Opts opts = arg->opts; - FVec2** source = arg->source; - FVec2** dest = arg->dest; - int start_x = arg->start_x; - int start_y = arg->start_y; - int w = arg->w; - int h = arg->h; - - while (sem_wait(sem) == -1) - { - for (int x = start_x; x < w + start_x - 1; x++) - { - for (int y = start_y; y < h + start_y -1; y++) - { - FVec2 each = source[x][y]; - dest[x][y].a = rd_a_prime(source, opts, x, y, laplacian_kernel, each.a, each.b); - dest[x][y].b = rd_b_prime(source, opts, x, y, laplacian_kernel, each.a, each.b); - } - } - sem_post(sem); - } - return _arg; + worker_arg* warg = (worker_arg*)_arg; + RD_Opts opts = warg->opts; + int start_x = warg->start_x; + int start_y = warg->start_y; + int w = warg->width; + int h = warg->height; + + for (int iterations = 0; iterations < warg->max_iterations; iterations++) + { + printf("worker %d: work unit %d/%d\n", warg->worker_id, iterations, warg->max_iterations); + for (int x = start_x; x < w + start_x - 1; x++) + { + for (int y = start_y; y < h + start_y - 1; y++) + { + FVec2 each = grid[x][y]; + grid_prime[x][y].a = rd_a_prime(grid, opts, x, y, laplacian_kernel, each.a, each.b); + grid_prime[x][y].b = rd_b_prime(grid, opts, x, y, laplacian_kernel, each.a, each.b); + } + } + + ++*warg->waiting_workers; + printf("worker %d: waking up boss\n", warg->worker_id); + pthread_cond_signal(warg->boss_cond); + + pthread_cond_wait(warg->worker_cond, warg->mutex); + + } + + printf("worker %d: exiting\n", warg->worker_id); + return _arg; } int main(int argc, char** argv) { - char chars[6] = {' ', ' ', ' ', '+', '#', '@'}; - const int max_chars = sizeof(chars) / sizeof(chars[0]) - 1; - - grid = (FVec2**)malloc(GRID_X * sizeof(FVec2*)); - for (int n = 0; n < GRID_X; n++) - { - grid[n] = (FVec2*)malloc(GRID_Y * sizeof(FVec2)); - } - - grid_prime = (FVec2**)malloc(GRID_X * sizeof(FVec2*)); - for (int n = 0; n < GRID_X; n++) - { - grid_prime[n] = (FVec2*)malloc(GRID_Y * sizeof(FVec2)); - } - - srand(time(NULL)); - - // Connectivity increases as time passes (iterations or time delta)? - // Seems we like <1.0 time deltas and higher iterations - // diff values influence tightness, delta between them shouldn't get wider than ~0.5 or narrower than ~0.4 - RD_Opts puffer = - { - .diff_a = 0.7f, - .diff_b = 0.3f, - .feed = 0.055f, - .kill = 0.062f, - .delta_t = 1.0f, - - .name = "puffer" - }; - - RD_Opts worms = - { - .diff_a = 0.7f, - .diff_b = 0.3f, - .feed = 0.082, - .kill = 0.06f, - .delta_t = 0.6f, - - .name = "worms" - }; - - RD_Opts meiosis = - { - .diff_a = 0.8f, - .diff_b = 0.4f, - .feed = 0.0367f, - .kill = 0.0649f, - .delta_t = 1.0f, - - .name = "meiosis" - }; - - RD_Opts opts = puffer; - - for (int x = 0; x < GRID_X; x++) - { - for (int y = 0; y < GRID_Y; y++) - { - grid[x][y].a = 1.0f; - grid[x][y].b = 0.0f; - } - } - - const int max_rand = sqrt(GRID_X); - for (int n = 0; n < max_rand; n++) - { - grid[rand() % GRID_X][rand() % GRID_Y].b = 1.0f; - } - - const size_t thread_count = 0; - pthread_t threads[thread_count]; - - iterator_sem = sem_open("itersem", O_CREAT, 0, thread_count); - - for (int t = 0; t < thread_count; t++) - { - IteratorArgObj arg_obj = { - opts, grid, grid_prime, 1, 1, (GRID_X/2), (GRID_Y)-1 - }; - pthread_create(&threads[t], NULL, iterator, &arg_obj); - } - - const int max_iterations = (GRID_X / 128.0f) * 1e4; - for (int iterations = 0; iterations < max_iterations; iterations++) - { - for (int x = 1; x < GRID_X-1; x++) - { - for (int y = 1; y < GRID_Y-1; y++) - { - FVec2 each = grid[x][y]; - grid_prime[x][y].a = rd_a_prime(NULL, opts, x, y, laplacian_kernel, each.a, each.b); - grid_prime[x][y].b = rd_b_prime(NULL, opts, x, y, laplacian_kernel, each.a, each.b); - } - } - //iterate(opts, grid, grid_prime, 1, 1, GRID_X-1, GRID_Y-1, GRID_X); - //iterate(opts, grid, grid_prime, 1, 1, (GRID_X/2), (GRID_Y)-1, GRID_X); - //iterate(opts, grid, grid_prime, GRID_X/2, 1, (GRID_X/2)-1, GRID_Y-1, GRID_X); - - grid_temp = grid; - grid = grid_prime; - grid_prime = grid_temp; - - if (iterations % (max_iterations/100) == 0) - //if (iterations == max_iterations-1) - { - char buffer[GRID_X][GRID_Y]; - for (int x = 0; x < GRID_X; x++) - { - for (int y = 0; y < GRID_Y; y++) - { - buffer[x][y] = (uint8_t)(255.0f * grid[x][y].a); - } - } - char name[64] = {0}; - sprintf(name, "img/%s/%d_grid.png", opts.name, iterations); - stbi_write_png(name, GRID_X, GRID_Y, 1, buffer, GRID_X * sizeof(uint8_t)); - } - } - - for (int t = 0; t < thread_count; t++) - { - pthread_join(threads[t], NULL); - } - - - printf("Opts: {\nIterations: %d\nTime delta: %f\nDiffA: %f\nDiffB: %f\nFeed: %f\nKill: %f\n\n}\n", max_iterations, opts.delta_t, opts.diff_a, opts.diff_b, opts.feed, opts.kill); - - // It's end of execution, who cares - free(grid); - free(grid_prime); - - return 0; + char chars[6] = {' ', ' ', ' ', '+', '#', '@'}; + const int max_chars = sizeof(chars) / sizeof(chars[0]) - 1; + + grid = (FVec2**)malloc(GRID_X * sizeof(FVec2*)); + for (int n = 0; n < GRID_X; n++) + { + grid[n] = (FVec2*)malloc(GRID_Y * sizeof(FVec2)); + } + + grid_prime = (FVec2**)malloc(GRID_X * sizeof(FVec2*)); + for (int n = 0; n < GRID_X; n++) + { + grid_prime[n] = (FVec2*)malloc(GRID_Y * sizeof(FVec2)); + } + + srand(time(NULL)); + + RD_Opts opts = meiosis; + + for (int x = 0; x < GRID_X; x++) + { + for (int y = 0; y < GRID_Y; y++) + { + grid[x][y].a = 1.0f; + grid[x][y].b = 0.0f; + } + } + + const int max_rand = sqrt(GRID_X); + for (int n = 0; n < max_rand; n++) + { + grid[rand() % GRID_X][rand() % GRID_Y].b = 1.0f; + } + + int waiting_workers = 0; + worker_arg warg = { + opts, grid, grid_prime, 1, 1, (GRID_X-1), (GRID_Y-1), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4 + }; + pthread_t threads[warg.worker_count]; + pthread_mutex_t mutex; + pthread_cond_t worker_cond; + pthread_cond_t boss_cond; + + pthread_mutex_init(&mutex, NULL); + pthread_cond_init(&worker_cond, NULL); + pthread_cond_init(&boss_cond, NULL); + + warg.worker_cond = &worker_cond; + warg.boss_cond = &boss_cond; + warg.mutex = &mutex; + + worker_arg wargs[warg.worker_count]; + + for (int t = 0; t < warg.worker_count; t++) + { + wargs[t] = warg; + wargs[t].worker_id = t; + wargs[t].width = ((GRID_X)/warg.worker_count) - 1; + wargs[t].start_x = 1 + (wargs[t].width * (t)); + printf("worker %d x_span %d, %d\n", t, wargs[t].start_x, wargs[t].width); + pthread_create(&threads[t], NULL, iterator, &wargs[t]); + } + + int max_iterations = (GRID_X / 128.0f) * opts.max_iterations; + max_iterations = warg.max_iterations; + for (int iterations = 0; iterations < max_iterations; iterations++) + { + printf("boss: waiting on workers\n"); + while (*warg.waiting_workers < warg.worker_count) + { + pthread_cond_wait(warg.boss_cond, warg.mutex); + } + + grid_temp = grid; + grid = grid_prime; + grid_prime = grid_temp; + printf("boss: workload %d/%d\n", iterations, max_iterations); + + { + //sleep(1); + // segfault somewhere in here lmao + if (1 && (iterations % (max_iterations/100) == 0)) + { + char buffer[GRID_X][GRID_Y]; + for (int x = 0; x < GRID_X; x++) + { + for (int y = 0; y < GRID_Y; y++) + { + buffer[x][y] = (uint8_t)(255.0f * grid[x][y].a); + } + } + char name[64] = {0}; + sprintf(name, "img/%s/%d.png", opts.name, iterations); + stbi_write_png(name, GRID_X, GRID_Y, 1, buffer, GRID_X * sizeof(uint8_t)); + } + } + + *warg.waiting_workers = 0; + pthread_cond_broadcast(warg.worker_cond); + } + + printf("boss: exiting loop\n"); + for (int t = 0; t < warg.worker_count; t++) + { + pthread_join(threads[t], NULL); + } + + printf("Opts: {\nIterations: %d\nTime delta: %f\nDiffA: %f\nDiffB: %f\nFeed: %f\nKill: %f\n\n}\n", max_iterations, opts.delta_t, opts.diff_a, opts.diff_b, opts.feed, opts.kill); + + // It's end of execution, who cares + free(grid); + free(grid_prime); + + return 0; } -- 2.49.0