ex2.c (5772B)
1 #include <err.h> 2 #include <errno.h> 3 #include <pthread.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <time.h> 7 8 /* 9 * Εργαστήριο ΛΣ2 (Δ6) / Εργασία 2: Άσκηση 2 / 2020-2021 10 * Ονοματεπώνυμο: Χρήστος Μαργιώλης 11 * ΑΜ: 19390133 12 * Τρόπος μεταγλώττισης: `cc ex2.c -lpthread -DLLIM=x -DULIM=y -o ex2` 13 * Όπου `x` και `y` το κάτω και πάνω όριο αντίστοιχα. 14 */ 15 16 struct foo { 17 int **g_arr; /* Global array. */ 18 int **d; /* `d[i][j] = g_max - g_arr[i][j]` */ 19 int *l_max; /* Local maximums. */ 20 int g_max; /* Global maximum. */ 21 int l_n; /* Number of elements each thread will work with. */ 22 int n; /* Dimensions. */ 23 int ntd; /* Number of threads. */ 24 int tid; /* Thread ID. */ 25 pthread_mutex_t mtx; 26 pthread_barrier_t bar; 27 }; 28 29 static void *threaded_callback(void *); 30 static void *emalloc(size_t); 31 static void usage(void); 32 33 static char *argv0; 34 35 static void * 36 thread_callback(void *foo) 37 { 38 struct foo *f; 39 int i, j, start, end, max, rc; 40 41 f = (struct foo *)foo; 42 /* 43 * Each thread reads X rows from `g_arr`. Suppose that we have 44 * 2 threads and a 4x4 array: 45 * 46 * row: 0 | 1, 2, 3, 4 | 47 * row: 1 | 5, 6, 7, 8 | 48 * row: 2 | 9, 10, 11, 12 | 49 * row: 3 | 13, 14, 15, 16 | 50 * 51 * Each thread now has to work with 2 rows from the array since 52 * `l_n = n / p = 4 / 2 = 2`. But in order to get the 53 * correct row, we have to offset it by the thread number. 54 * 55 * So, thread 0 will read the following part of `g_arr`: 56 * start: tid * l_n -> 0 * 2 -> 0 57 * end: start + l_n -> 0 + 2 -> 2 58 * 59 * which is the first half of `g_arr`: 60 * row: 0 | 1, 2, 3, 4 | 61 * row: 1 | 5, 6, 7, 8 | 62 * 63 * Following the same logic, thread 1 will read at: 64 * start = 1 * 2 -> 2 65 * end = 2 + 2 -> 4 66 * 67 * which is the second half of `g_arr`: 68 * row: 2 | 9, 10, 11, 12 | 69 * row: 3 | 13, 14, 15, 16 | 70 * 71 */ 72 start = f->tid * f->l_n; 73 end = start + f->l_n; 74 75 /* Calculate the local max. */ 76 max = *f->g_arr[start]; 77 for (i = start; i < end; i++) 78 for (j = 0; j < f->n; j++) 79 if (f->g_arr[i][j] > max) 80 max = f->g_arr[i][j]; 81 f->l_max[f->tid] = max; 82 83 /* Calculate the global max right away, no need for more functions. */ 84 if (pthread_mutex_lock(&f->mtx) != 0) 85 err(1, "pthread_mutex_lock"); 86 if (f->tid == 0) 87 f->g_max = *f->l_max; 88 else if (f->l_max[f->tid] > f->g_max) 89 f->g_max = f->l_max[f->tid]; 90 91 /* 92 * We need to know each thread's ID in order to calculate `start` 93 * and `end` properly. Since we don't touch `f->tid` inside `main` 94 * (apart from initializing it), we need to update it here. 95 */ 96 f->tid++; 97 if (pthread_mutex_unlock(&f->mtx) != 0) 98 err(1, "pthread_mutex_unlock"); 99 100 /* 101 * Wait for all threads to finish executing the statements above, 102 * then continue. 103 */ 104 rc = pthread_barrier_wait(&f->bar); 105 if ((rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) || errno == EINVAL) 106 err(1, "pthread_barrier_wait"); 107 108 /* 109 * We'll get here once all threads have passed the barrier, which 110 * means that `f->tid` will be equal to the total number of threads. 111 * In order for `start` and `end` to be offset properly again, we now 112 * need to go backwards. 113 */ 114 if (pthread_mutex_lock(&f->mtx) != 0) 115 err(1, "pthread_mutex_lock"); 116 f->tid--; 117 if (pthread_mutex_unlock(&f->mtx) != 0) 118 err(1, "pthread_mutex_unlock"); 119 120 /* Calculate the D array. */ 121 start = f->tid * f->l_n; 122 end = start + f->l_n; 123 for (i = start; i < end; i++) 124 for (j = 0; j < f->n; j++) 125 f->d[i][j] = f->g_max - f->g_arr[i][j]; 126 127 return NULL; 128 } 129 130 static void * 131 emalloc(size_t nb) 132 { 133 void *p; 134 135 if ((p = malloc(nb)) == NULL) 136 err(1, "malloc"); 137 138 return p; 139 } 140 141 static void 142 usage(void) 143 { 144 fprintf(stderr, "usage: %s [-t threads]\n", argv0); 145 exit(1); 146 } 147 148 int 149 main(int argc, char *argv[]) 150 { 151 struct foo *f; 152 pthread_t *tds; 153 int i, j, rc; 154 155 argv0 = *argv; 156 f = emalloc(sizeof(struct foo)); 157 158 do { 159 printf("\rp: "); 160 /* 161 * Save the return value of scanf(3) to make sure 162 * we did read valid input. 163 */ 164 rc = scanf("%d", &f->ntd); 165 /* Flush input buffer. */ 166 (void)getchar(); 167 /* Cannot have less than 1 threads. */ 168 } while (f->ntd < 1 || rc != 1); 169 170 do { 171 printf("\rn: "); 172 rc = scanf("%d", &f->n); 173 (void)getchar(); 174 /* 175 * The number of elements must be greater than 0 (obviously) and 176 * a multiple of the total number of threads. 177 */ 178 } while (f->n < 0 || f->n % f->ntd != 0 || rc != 1); 179 180 tds = emalloc(f->ntd * sizeof(pthread_t)); 181 f->l_n = f->n / f->ntd; 182 f->g_arr = emalloc(f->n * sizeof(int *)); 183 f->l_max = emalloc(f->ntd * sizeof(int)); 184 f->d = emalloc(f->n * sizeof(int *)); 185 186 /* The exercise says we should read from a file, but don't do it. :-) */ 187 srand(time(NULL)); 188 for (i = 0; i < f->n; i++) { 189 f->g_arr[i] = emalloc(f->n * sizeof(int)); 190 f->d[i] = emalloc(f->n * sizeof(int)); 191 for (j = 0; j < f->n; j++) 192 f->g_arr[i][j] = rand() % (ULIM - LLIM) + LLIM; 193 } 194 195 if (pthread_mutex_init(&f->mtx, NULL) != 0) 196 err(1, "pthread_mutex_init"); 197 if (pthread_barrier_init(&f->bar, NULL, f->ntd) != 0) 198 err(1, "pthread_barrier_init"); 199 200 for (i = 0, f->tid = 0; i < f->ntd; i++) 201 if (pthread_create(&tds[i], NULL, thread_callback, (void *)f) != 0) 202 err(1, "pthread_create"); 203 for (i = 0; i < f->ntd; i++) 204 if (pthread_join(tds[i], NULL) != 0) 205 err(1, "pthread_join"); 206 207 /* Print results. */ 208 for (i = 0; i < f->n; i++) 209 for (j = 0; j < f->n; j++) 210 printf("arr[%d][%d]: %d\td[%d][%d]: %d\n", 211 i, j, f->g_arr[i][j], 212 i, j, f->d[i][j]); 213 printf("max: %d\n", f->g_max); 214 215 (void)pthread_barrier_destroy(&f->bar); 216 (void)pthread_mutex_destroy(&f->mtx); 217 pthread_exit(NULL); 218 free(tds); 219 while (f->n--) { 220 free(*f->g_arr++); 221 free(*f->d++); 222 } 223 free(f->g_arr); 224 free(f->l_max); 225 free(f->d); 226 free(f); 227 228 return 0; 229 }