#include <stdlib.h>
#include <string.h>
#include <pthread.h>
+#include <unistd.h>
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
int width;
int height;
- pthread_mutex_t* mutex;
- pthread_cond_t* worker_cond;
- pthread_cond_t* boss_cond;
+ pthread_mutex_t* mutex;
+ pthread_mutex_t worker_mutex;
+ pthread_cond_t* worker_cond;
+ pthread_cond_t* boss_cond;
int* waiting_workers;
int worker_count;
float rd_a_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B)
{
float a_prime = 1.0f;
+ int x_less = (x-1 < 0) ? x : x-1;
+ int y_less = (y-1 < 0) ? y : y-1;
+ int x_more = (x+1 == GRID_X) ? x : x+1;
+ int y_more = (y+1 == GRID_Y) ? y : y+1;
// Use species A in the convolution, b_prime will need B
// For now we won't iterate over edge rows and columns to avoid special case vomit
Mat3 source =
{
.v =
{
- {grid[x-1][y-1].a, grid[x][y-1].a, grid[x+1][y-1].a},
- {grid[x-1][y+0].a, grid[x][y+0].a, grid[x+1][y+0].a},
- {grid[x-1][y+1].a, grid[x][y+1].a, grid[x+1][y+1].a},
+ {grid[x_less][y_less].a, grid[x][y_less].a, grid[x_more][y_less].a},
+ {grid[x_less][y+0].a, grid[x][y+0].a, grid[x_more][y+0].a},
+ {grid[x_less][y_more].a, grid[x][y_more].a, grid[x_more][y_more].a},
}
};
float rd_b_prime(FVec2** source_grid, RD_Opts opts, int x, int y, Mat3 kernel, float A, float B)
{
float b_prime = 1.0f;
+ int x_less = (x-1 < 0) ? x : x-1;
+ int y_less = (y-1 < 0) ? y : y-1;
+ int x_more = (x+1 == GRID_X) ? x : x+1;
+ int y_more = (y+1 == GRID_Y) ? y : y+1;
// Use species A in the convolution, b_prime will need B
Mat3 source =
{
.v =
{
- {grid[x-1][y-1].b, grid[x][y-1].b, grid[x+1][y-1].b},
- {grid[x-1][y+0].b, grid[x][y+0].b, grid[x+1][y+0].b},
- {grid[x-1][y+1].b, grid[x][y+1].b, grid[x+1][y+1].b},
+ {grid[x_less][y_less].b, grid[x][y_less].b, grid[x_more][y_less].b},
+ {grid[x_less][y+0].b, grid[x][y+0].b, grid[x_more][y+0].b},
+ {grid[x_less][y_more].b, grid[x][y_more].b, grid[x_more][y_more].b},
}
};
for (int iterations = 0; iterations < warg->max_iterations; iterations++)
{
printf("worker %d: work unit %d/%d\n", warg->worker_id, iterations, warg->max_iterations);
- for (int x = start_x; x < w + start_x - 1; x++)
+ for (int x = start_x; x < w + start_x; x++)
{
- for (int y = start_y; y < h + start_y - 1; y++)
+ for (int y = start_y; y < h + start_y; y++)
{
FVec2 each = grid[x][y];
grid_prime[x][y].a = rd_a_prime(grid, opts, x, y, laplacian_kernel, each.a, each.b);
printf("worker %d: waking up boss\n", warg->worker_id);
pthread_cond_signal(warg->boss_cond);
- pthread_cond_wait(warg->worker_cond, warg->mutex);
+ pthread_cond_wait(warg->worker_cond, &warg->worker_mutex);
}
int waiting_workers = 0;
worker_arg warg = {
- opts, grid, grid_prime, 1, 1, (GRID_X-1), (GRID_Y-1), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4
+ opts, grid, grid_prime, 0, 0, (GRID_X), (GRID_Y), .worker_count = 4, .waiting_workers = &waiting_workers, .max_iterations = 1e4
};
pthread_t threads[warg.worker_count];
pthread_mutex_t mutex;
{
wargs[t] = warg;
wargs[t].worker_id = t;
- wargs[t].width = ((GRID_X)/warg.worker_count) - 1;
- wargs[t].start_x = 1 + (wargs[t].width * (t));
+ wargs[t].width = (GRID_X/warg.worker_count);
+ wargs[t].start_x = (wargs[t].width * t);
+ pthread_mutex_init(&wargs[t].worker_mutex, NULL);
printf("worker %d x_span %d, %d\n", t, wargs[t].start_x, wargs[t].width);
pthread_create(&threads[t], NULL, iterator, &wargs[t]);
}
grid = grid_prime;
grid_prime = grid_temp;
printf("boss: workload %d/%d\n", iterations, max_iterations);
-
{
- //sleep(1);
// segfault somewhere in here lmao
if (1 && (iterations % (max_iterations/100) == 0))
{