bit of regression in theads synching properly but locking up eventually

author Randy McShandy <randy@mcshandy.xyz>

Tue, 12 Dec 2023 04:52:32 +0000 (22:52 -0600)

committer Randy McShandy <randy@mcshandy.xyz>

Tue, 12 Dec 2023 04:52:32 +0000 (22:52 -0600)
author Randy McShandy <randy@mcshandy.xyz>
Tue, 12 Dec 2023 04:52:32 +0000 (22:52 -0600)
committer Randy McShandy <randy@mcshandy.xyz>
Tue, 12 Dec 2023 04:52:32 +0000 (22:52 -0600)
diff --git a/main.c b/main.c

index ade90ce7766bf0e5f6528f073317e13721673a6a..057f71dd73b5e3ae7838561f70589967e5d7d12e 100755 (executable)
--- a/main.c
+++ b/main.c
@@ -4,6 +4,7 @@
  #include <stdlib.h>
  #include <string.h>
  #include <pthread.h>
+#include <unistd.h>
  #define STB_IMAGE_WRITE_IMPLEMENTATION
  #include "stb_image_write.h"
  
@@ -49,9 +50,10 @@ typedef struct
         int width;
         int height;
  
-       pthread_mutex_t* mutex;
-       pthread_cond_t* worker_cond;
-       pthread_cond_t* boss_cond;
+       pthread_mutex_t*        mutex;
+       pthread_mutex_t         worker_mutex;
+       pthread_cond_t*         worker_cond;
+       pthread_cond_t*         boss_cond;
  
         int* waiting_workers;
         int worker_count;
@@ -135,15 +137,19 @@ float kernel_sum(Mat3 kernel, Mat3 source)
  float rd_a_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B)
  {
         float a_prime = 1.0f;
+       int x_less = (x-1 < 0) ? x : x-1;
+       int y_less = (y-1 < 0) ? y : y-1;
+       int x_more = (x+1 == GRID_X) ? x : x+1;
+       int y_more = (y+1 == GRID_Y) ? y : y+1;
         // Use species A in the convolution, b_prime will need B
         // For now we won't iterate over edge rows and columns to avoid special case vomit
         Mat3 source =
         {
                 .v =
                 {
-                       {grid[x-1][y-1].a,  grid[x][y-1].a, grid[x+1][y-1].a},
-                       {grid[x-1][y+0].a,  grid[x][y+0].a, grid[x+1][y+0].a},
-                       {grid[x-1][y+1].a,  grid[x][y+1].a, grid[x+1][y+1].a},
+                       {grid[x_less][y_less].a,  grid[x][y_less].a,    grid[x_more][y_less].a},
+                       {grid[x_less][y+0].a,                   grid[x][y+0].a,                 grid[x_more][y+0].a},
+                       {grid[x_less][y_more].a,  grid[x][y_more].a,    grid[x_more][y_more].a},
                 }
         };
  
@@ -154,14 +160,18 @@ float rd_a_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, f
  float rd_b_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B)
  {
         float b_prime = 1.0f;
+       int x_less = (x-1 < 0) ? x : x-1;
+       int y_less = (y-1 < 0) ? y : y-1;
+       int x_more = (x+1 == GRID_X) ? x : x+1;
+       int y_more = (y+1 == GRID_Y) ? y : y+1;
         // Use species A in the convolution, b_prime will need B
         Mat3 source =
         {
                 .v =
                 {
-                       {grid[x-1][y-1].b,  grid[x][y-1].b, grid[x+1][y-1].b},
-                       {grid[x-1][y+0].b,  grid[x][y+0].b, grid[x+1][y+0].b},
-                       {grid[x-1][y+1].b,  grid[x][y+1].b, grid[x+1][y+1].b},
+                       {grid[x_less][y_less].b,  grid[x][y_less].b,    grid[x_more][y_less].b},
+                       {grid[x_less][y+0].b,                   grid[x][y+0].b,                 grid[x_more][y+0].b},
+                       {grid[x_less][y_more].b,  grid[x][y_more].b,    grid[x_more][y_more].b},
                 }
         };
  
@@ -181,9 +191,9 @@ void* iterator(void* _arg)
         for (int iterations = 0; iterations < warg->max_iterations; iterations++)
         {
                 printf("worker %d: work unit %d/%d\n", warg->worker_id, iterations, warg->max_iterations);
-               for (int x = start_x; x < w + start_x - 1; x++)
+               for (int x = start_x; x < w + start_x; x++)
                 {
-                       for (int y = start_y; y < h + start_y - 1; y++)
+                       for (int y = start_y; y < h + start_y; y++)
                         {
                                 FVec2 each = grid[x][y];
                                 grid_prime[x][y].a = rd_a_prime(grid, opts, x, y, laplacian_kernel, each.a, each.b);
@@ -195,7 +205,7 @@ void* iterator(void* _arg)
                 printf("worker %d: waking up boss\n", warg->worker_id);
                 pthread_cond_signal(warg->boss_cond);
  
-               pthread_cond_wait(warg->worker_cond, warg->mutex);
+               pthread_cond_wait(warg->worker_cond, &warg->worker_mutex);
  
         }
  
@@ -241,7 +251,7 @@ int main(int argc, char** argv)
  
         int waiting_workers = 0;
         worker_arg warg = {
-               opts, grid, grid_prime, 1, 1, (GRID_X-1), (GRID_Y-1), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4
+               opts, grid, grid_prime, 0, 0, (GRID_X), (GRID_Y), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4
         };
         pthread_t threads[warg.worker_count];
         pthread_mutex_t mutex;
@@ -262,8 +272,9 @@ int main(int argc, char** argv)
         {
                 wargs[t]                                                = warg;
                 wargs[t].worker_id      = t;
-               wargs[t].width                  = ((GRID_X)/warg.worker_count) - 1;
-               wargs[t].start_x                = 1 + (wargs[t].width * (t));
+               wargs[t].width                  = (GRID_X/warg.worker_count);
+               wargs[t].start_x                = (wargs[t].width * t);
+               pthread_mutex_init(&wargs[t].worker_mutex, NULL);
                 printf("worker %d x_span %d, %d\n", t, wargs[t].start_x, wargs[t].width);
                 pthread_create(&threads[t], NULL, iterator, &wargs[t]);
         }
@@ -282,9 +293,7 @@ int main(int argc, char** argv)
                 grid = grid_prime;
                 grid_prime = grid_temp;
                 printf("boss: workload %d/%d\n", iterations, max_iterations);
-
                 {
-                       //sleep(1);
                         // segfault somewhere in here lmao
                                  if (1 && (iterations % (max_iterations/100) == 0))
                                  {
author	Randy McShandy <randy@mcshandy.xyz>
	Tue, 12 Dec 2023 04:52:32 +0000 (22:52 -0600)
committer	Randy McShandy <randy@mcshandy.xyz>
	Tue, 12 Dec 2023 04:52:32 +0000 (22:52 -0600)