// Adrien Bourmault 3677850 // Volodymyr Patuta // Groupe K #include "easypap.h" #include ///////////////////////////// Sequential version (tiled) // Suggested cmdline(s): // ./run -l images/1024.png -k blur -v seq -si // // resultat -O3 : 3295.680 // resultat -O2 : 3240.540 // resultat -O1 : 3289.684 // resultat -O0 : 9681.364 // int blur_do_tile_default (int x, int y, int width, int height) { for (int i = y; i < y + height; i++) for (int j = x; j < x + width; j++) { unsigned r = 0, g = 0, b = 0, a = 0, n = 0; int i_d = (i > 0) ? i - 1 : i; int i_f = (i < DIM - 1) ? i + 1 : i; int j_d = (j > 0) ? j - 1 : j; int j_f = (j < DIM - 1) ? j + 1 : j; for (int yloc = i_d; yloc <= i_f; yloc++) for (int xloc = j_d; xloc <= j_f; xloc++) { unsigned c = cur_img (yloc, xloc); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); n += 1; } r /= n; g /= n; b /= n; a /= n; next_img (i, j) = rgba (r, g, b, a); } return 0; } int blur_do_tile_default_nb (int x, int y, int width, int height) { for (int i = y + 1; i < y + height - 1; i++) { for (int j = x + 1; j < x + width - 1; j++) { unsigned r = 0, g = 0, b = 0, a = 0, n = 0; for (int yloc = i - 1; yloc <= i + 1; yloc++) { for (int xloc = j - 1; xloc <= j + 1; xloc++) { unsigned c = cur_img (yloc, xloc); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); n += 1; } } r /= n; g /= n; b /= n; a /= n; next_img (i, j) = rgba (r, g, b, a); } } for (int i = 0; i < width - 1; i++) { next_img (i, 0) = cur_img(i, 0); next_img (i, DIM - 1) = cur_img(i, DIM - 1); next_img (0, i) = cur_img(0, i); next_img (DIM - 1, i) = cur_img(DIM - 1, i); } return 0; } // resultat optim1 Denver: 7349.435 // resultat optim1 Cortex: 23348.009 // resultat nb Denver: 13512.337 // resultat nb Cortex: 27133.248 int blur_do_tile_default_optim1 (int x, int y, int width, int height) { for (int i = y + 1; i < y + height - 1; i++) { for (int j = x + 1; j < x + width - 1; j++) { unsigned r = 0, g = 0, b = 0, a = 0, n = 0; for (int yloc = i - 1; yloc <= i + 1; yloc++) { unsigned c = cur_img (yloc, j - 1); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (yloc, j); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (yloc, j + 1); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); n += 3; } r /= n; g /= n; b /= n; a /= n; next_img (i, j) = rgba (r, g, b, a); } } for (int i = 0; i < width - 1; i++) { next_img (i, 0) = cur_img(i, 0); next_img (i, DIM - 1) = cur_img(i, DIM - 1); next_img (0, i) = cur_img(0, i); next_img (DIM - 1, i) = cur_img(DIM - 1, i); } return 0; } // resultat Denver: 6355.280 // resultat Cortex: 23297.872 int blur_do_tile_default_optim2 (int x, int y, int width, int height) { for (int i = y + 1; i < y + height - 1; i++) { for (int j = x + 1; j < x + width - 1; j++) { unsigned r = 0, g = 0, b = 0, a = 0, n = 0; unsigned c = cur_img (i - 1, j - 1); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (i - 1, j); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (i - 1, j + 1); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (i, j - 1); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (i, j); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (i, j + 1); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (i + 1, j - 1); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (i + 1, j); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); c = cur_img (i + 1, j + 1); r += extract_red (c); g += extract_green (c); b += extract_blue (c); a += extract_alpha (c); n += 9; r /= n; g /= n; b /= n; a /= n; next_img (i, j) = rgba (r, g, b, a); } } for (int i = 0; i < width - 1; i++) { next_img (i, 0) = cur_img(i, 0); next_img (i, DIM - 1) = cur_img(i, DIM - 1); next_img (0, i) = cur_img(0, i); next_img (DIM - 1, i) = cur_img(DIM - 1, i); } return 0; } // resultat Denver: 5189.197 // resultat Cortex: 16848.161 int blur_do_tile_default_optim3 (int x, int y, int width, int height) { for (int i = y + 1; i < y + height - 1; i++) { for (int j = x + 1; j < x + width - 1; j++) { unsigned r = 0, g = 0, b = 0, a = 0, n = 0; unsigned c = cur_img (i - 1, j - 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i - 1, j); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i - 1, j + 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i, j - 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i, j); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i, j + 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i + 1, j - 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i + 1, j); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i + 1, j + 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; n += 9; r /= n; g /= n; b /= n; a /= n; next_img (i, j) = rgba (r, g, b, a); } } for (int i = 0; i < width - 1; i++) { next_img (i, 0) = cur_img(i, 0); next_img (i, DIM - 1) = cur_img(i, DIM - 1); next_img (0, i) = cur_img(0, i); next_img (DIM - 1, i) = cur_img(DIM - 1, i); } return 0; } int blur_do_tile_default_optim4 (int x, int y, int width, int height) { for (int i = y + 1; i < y + height - 1; i++) { for (int j = x + 1; j < x + width - 1; j++) { unsigned r = 0, g = 0, b = 0, a = 0, n = 0; unsigned c = cur_img (i - 1, j - 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i - 1, j); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i - 1, j + 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i, j - 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i, j); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i, j + 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i + 1, j - 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i + 1, j); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; c = cur_img (i + 1, j + 1); r += c >> 24; g += (c >> 16) & 255; b += (c >> 8) & 255; a += c & 255; n += 9; r /= n; g /= n; b /= n; a /= n; next_img (i, j) = rgba (r, g, b, a); } } for (int i = 0; i < width - 1; i++) { next_img (i, 0) = cur_img(i, 0); next_img (i, DIM - 1) = cur_img(i, DIM - 1); next_img (0, i) = cur_img(0, i); next_img (DIM - 1, i) = cur_img(DIM - 1, i); } return 0; } ///////////////////////////// Sequential version (tiled) // Suggested cmdline(s): // ./run -l images/1024.png -k blur -v seq // unsigned blur_compute_seq (unsigned nb_iter) { for (unsigned it = 1; it <= nb_iter; it++) { do_tile (0, 0, DIM, DIM, 0); swap_images (); } return 0; } ///////////////////////////// Tiled sequential version (tiled) // Suggested cmdline(s): // ./run -l images/1024.png -k blur -v tiled -ts 32 -m si // unsigned blur_compute_tiled (unsigned nb_iter) { for (unsigned it = 1; it <= nb_iter; it++) { for (int y = 0; y < DIM; y += TILE_H) for (int x = 0; x < DIM; x += TILE_W) do_tile (x, y, TILE_W, TILE_H, 0); swap_images (); } return 0; }