]> git.mcshandy.xyz Git - barrow_crawler/commitdiff
bit of regression in theads synching properly but locking up eventually
authorRandy McShandy <randy@mcshandy.xyz>
Tue, 12 Dec 2023 04:52:32 +0000 (22:52 -0600)
committerRandy McShandy <randy@mcshandy.xyz>
Tue, 12 Dec 2023 04:52:32 +0000 (22:52 -0600)
main.c

diff --git a/main.c b/main.c
index ade90ce7766bf0e5f6528f073317e13721673a6a..057f71dd73b5e3ae7838561f70589967e5d7d12e 100755 (executable)
--- a/main.c
+++ b/main.c
@@ -4,6 +4,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <pthread.h>
+#include <unistd.h>
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 #include "stb_image_write.h"
 
@@ -49,9 +50,10 @@ typedef struct
        int width;
        int height;
 
-       pthread_mutex_t* mutex;
-       pthread_cond_t* worker_cond;
-       pthread_cond_t* boss_cond;
+       pthread_mutex_t*        mutex;
+       pthread_mutex_t         worker_mutex;
+       pthread_cond_t*         worker_cond;
+       pthread_cond_t*         boss_cond;
 
        int* waiting_workers;
        int worker_count;
@@ -135,15 +137,19 @@ float kernel_sum(Mat3 kernel, Mat3 source)
 float rd_a_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B)
 {
        float a_prime = 1.0f;
+       int x_less = (x-1 < 0) ? x : x-1;
+       int y_less = (y-1 < 0) ? y : y-1;
+       int x_more = (x+1 == GRID_X) ? x : x+1;
+       int y_more = (y+1 == GRID_Y) ? y : y+1;
        // Use species A in the convolution, b_prime will need B
        // For now we won't iterate over edge rows and columns to avoid special case vomit
        Mat3 source =
        {
                .v =
                {
-                       {grid[x-1][y-1].a,  grid[x][y-1].a, grid[x+1][y-1].a},
-                       {grid[x-1][y+0].a,  grid[x][y+0].a, grid[x+1][y+0].a},
-                       {grid[x-1][y+1].a,  grid[x][y+1].a, grid[x+1][y+1].a},
+                       {grid[x_less][y_less].a,  grid[x][y_less].a,    grid[x_more][y_less].a},
+                       {grid[x_less][y+0].a,                   grid[x][y+0].a,                 grid[x_more][y+0].a},
+                       {grid[x_less][y_more].a,  grid[x][y_more].a,    grid[x_more][y_more].a},
                }
        };
 
@@ -154,14 +160,18 @@ float rd_a_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, f
 float rd_b_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B)
 {
        float b_prime = 1.0f;
+       int x_less = (x-1 < 0) ? x : x-1;
+       int y_less = (y-1 < 0) ? y : y-1;
+       int x_more = (x+1 == GRID_X) ? x : x+1;
+       int y_more = (y+1 == GRID_Y) ? y : y+1;
        // Use species A in the convolution, b_prime will need B
        Mat3 source =
        {
                .v =
                {
-                       {grid[x-1][y-1].b,  grid[x][y-1].b, grid[x+1][y-1].b},
-                       {grid[x-1][y+0].b,  grid[x][y+0].b, grid[x+1][y+0].b},
-                       {grid[x-1][y+1].b,  grid[x][y+1].b, grid[x+1][y+1].b},
+                       {grid[x_less][y_less].b,  grid[x][y_less].b,    grid[x_more][y_less].b},
+                       {grid[x_less][y+0].b,                   grid[x][y+0].b,                 grid[x_more][y+0].b},
+                       {grid[x_less][y_more].b,  grid[x][y_more].b,    grid[x_more][y_more].b},
                }
        };
 
@@ -181,9 +191,9 @@ void* iterator(void* _arg)
        for (int iterations = 0; iterations < warg->max_iterations; iterations++)
        {
                printf("worker %d: work unit %d/%d\n", warg->worker_id, iterations, warg->max_iterations);
-               for (int x = start_x; x < w + start_x - 1; x++)
+               for (int x = start_x; x < w + start_x; x++)
                {
-                       for (int y = start_y; y < h + start_y - 1; y++)
+                       for (int y = start_y; y < h + start_y; y++)
                        {
                                FVec2 each = grid[x][y];
                                grid_prime[x][y].a = rd_a_prime(grid, opts, x, y, laplacian_kernel, each.a, each.b);
@@ -195,7 +205,7 @@ void* iterator(void* _arg)
                printf("worker %d: waking up boss\n", warg->worker_id);
                pthread_cond_signal(warg->boss_cond);
 
-               pthread_cond_wait(warg->worker_cond, warg->mutex);
+               pthread_cond_wait(warg->worker_cond, &warg->worker_mutex);
 
        }
 
@@ -241,7 +251,7 @@ int main(int argc, char** argv)
 
        int waiting_workers = 0;
        worker_arg warg = {
-               opts, grid, grid_prime, 1, 1, (GRID_X-1), (GRID_Y-1), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4
+               opts, grid, grid_prime, 0, 0, (GRID_X), (GRID_Y), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4
        };
        pthread_t threads[warg.worker_count];
        pthread_mutex_t mutex;
@@ -262,8 +272,9 @@ int main(int argc, char** argv)
        {
                wargs[t]                                                = warg;
                wargs[t].worker_id      = t;
-               wargs[t].width                  = ((GRID_X)/warg.worker_count) - 1;
-               wargs[t].start_x                = 1 + (wargs[t].width * (t));
+               wargs[t].width                  = (GRID_X/warg.worker_count);
+               wargs[t].start_x                = (wargs[t].width * t);
+               pthread_mutex_init(&wargs[t].worker_mutex, NULL);
                printf("worker %d x_span %d, %d\n", t, wargs[t].start_x, wargs[t].width);
                pthread_create(&threads[t], NULL, iterator, &wargs[t]);
        }
@@ -282,9 +293,7 @@ int main(int argc, char** argv)
                grid = grid_prime;
                grid_prime = grid_temp;
                printf("boss: workload %d/%d\n", iterations, max_iterations);
-
                {
-                       //sleep(1);
                        // segfault somewhere in here lmao
                                 if (1 && (iterations % (max_iterations/100) == 0))
                                 {