394 lines
8.5 KiB
C
394 lines
8.5 KiB
C
|
// Adrien Bourmault 3677850
|
||
|
// Volodymyr Patuta
|
||
|
// Groupe K
|
||
|
|
||
|
#include "easypap.h"
|
||
|
|
||
|
#include <omp.h>
|
||
|
|
||
|
///////////////////////////// Sequential version (tiled)
|
||
|
// Suggested cmdline(s):
|
||
|
// ./run -l images/1024.png -k blur -v seq -si
|
||
|
//
|
||
|
// resultat -O3 : 3295.680
|
||
|
// resultat -O2 : 3240.540
|
||
|
// resultat -O1 : 3289.684
|
||
|
// resultat -O0 : 9681.364
|
||
|
//
|
||
|
int blur_do_tile_default (int x, int y, int width, int height)
|
||
|
{
|
||
|
for (int i = y; i < y + height; i++)
|
||
|
for (int j = x; j < x + width; j++) {
|
||
|
unsigned r = 0, g = 0, b = 0, a = 0, n = 0;
|
||
|
|
||
|
int i_d = (i > 0) ? i - 1 : i;
|
||
|
int i_f = (i < DIM - 1) ? i + 1 : i;
|
||
|
int j_d = (j > 0) ? j - 1 : j;
|
||
|
int j_f = (j < DIM - 1) ? j + 1 : j;
|
||
|
|
||
|
for (int yloc = i_d; yloc <= i_f; yloc++)
|
||
|
for (int xloc = j_d; xloc <= j_f; xloc++) {
|
||
|
unsigned c = cur_img (yloc, xloc);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
n += 1;
|
||
|
}
|
||
|
|
||
|
r /= n;
|
||
|
g /= n;
|
||
|
b /= n;
|
||
|
a /= n;
|
||
|
|
||
|
next_img (i, j) = rgba (r, g, b, a);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int blur_do_tile_default_nb (int x, int y, int width, int height)
|
||
|
{
|
||
|
for (int i = y + 1; i < y + height - 1; i++) {
|
||
|
for (int j = x + 1; j < x + width - 1; j++) {
|
||
|
unsigned r = 0, g = 0, b = 0, a = 0, n = 0;
|
||
|
for (int yloc = i - 1; yloc <= i + 1; yloc++) {
|
||
|
for (int xloc = j - 1; xloc <= j + 1; xloc++) {
|
||
|
unsigned c = cur_img (yloc, xloc);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
n += 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
r /= n;
|
||
|
g /= n;
|
||
|
b /= n;
|
||
|
a /= n;
|
||
|
|
||
|
next_img (i, j) = rgba (r, g, b, a);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < width - 1; i++) {
|
||
|
next_img (i, 0) = cur_img(i, 0);
|
||
|
next_img (i, DIM - 1) = cur_img(i, DIM - 1);
|
||
|
next_img (0, i) = cur_img(0, i);
|
||
|
next_img (DIM - 1, i) = cur_img(DIM - 1, i);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// resultat optim1 Denver: 7349.435
|
||
|
// resultat optim1 Cortex: 23348.009
|
||
|
// resultat nb Denver: 13512.337
|
||
|
// resultat nb Cortex: 27133.248
|
||
|
int blur_do_tile_default_optim1 (int x, int y, int width, int height)
|
||
|
{
|
||
|
for (int i = y + 1; i < y + height - 1; i++) {
|
||
|
for (int j = x + 1; j < x + width - 1; j++) {
|
||
|
unsigned r = 0, g = 0, b = 0, a = 0, n = 0;
|
||
|
for (int yloc = i - 1; yloc <= i + 1; yloc++) {
|
||
|
unsigned c = cur_img (yloc, j - 1);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
c = cur_img (yloc, j);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
c = cur_img (yloc, j + 1);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
n += 3;
|
||
|
}
|
||
|
|
||
|
r /= n;
|
||
|
g /= n;
|
||
|
b /= n;
|
||
|
a /= n;
|
||
|
|
||
|
next_img (i, j) = rgba (r, g, b, a);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < width - 1; i++) {
|
||
|
next_img (i, 0) = cur_img(i, 0);
|
||
|
next_img (i, DIM - 1) = cur_img(i, DIM - 1);
|
||
|
next_img (0, i) = cur_img(0, i);
|
||
|
next_img (DIM - 1, i) = cur_img(DIM - 1, i);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// resultat Denver: 6355.280
|
||
|
// resultat Cortex: 23297.872
|
||
|
int blur_do_tile_default_optim2 (int x, int y, int width, int height)
|
||
|
{
|
||
|
for (int i = y + 1; i < y + height - 1; i++) {
|
||
|
for (int j = x + 1; j < x + width - 1; j++) {
|
||
|
unsigned r = 0, g = 0, b = 0, a = 0, n = 0;
|
||
|
|
||
|
unsigned c = cur_img (i - 1, j - 1);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
c = cur_img (i - 1, j);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
c = cur_img (i - 1, j + 1);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
|
||
|
c = cur_img (i, j - 1);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
c = cur_img (i, j);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
c = cur_img (i, j + 1);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
|
||
|
c = cur_img (i + 1, j - 1);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
c = cur_img (i + 1, j);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
c = cur_img (i + 1, j + 1);
|
||
|
r += extract_red (c);
|
||
|
g += extract_green (c);
|
||
|
b += extract_blue (c);
|
||
|
a += extract_alpha (c);
|
||
|
|
||
|
n += 9;
|
||
|
|
||
|
r /= n;
|
||
|
g /= n;
|
||
|
b /= n;
|
||
|
a /= n;
|
||
|
|
||
|
next_img (i, j) = rgba (r, g, b, a);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < width - 1; i++) {
|
||
|
next_img (i, 0) = cur_img(i, 0);
|
||
|
next_img (i, DIM - 1) = cur_img(i, DIM - 1);
|
||
|
next_img (0, i) = cur_img(0, i);
|
||
|
next_img (DIM - 1, i) = cur_img(DIM - 1, i);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// resultat Denver: 5189.197
|
||
|
// resultat Cortex: 16848.161
|
||
|
int blur_do_tile_default_optim3 (int x, int y, int width, int height)
|
||
|
{
|
||
|
for (int i = y + 1; i < y + height - 1; i++) {
|
||
|
for (int j = x + 1; j < x + width - 1; j++) {
|
||
|
unsigned r = 0, g = 0, b = 0, a = 0, n = 0;
|
||
|
|
||
|
unsigned c = cur_img (i - 1, j - 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i - 1, j);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i - 1, j + 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
|
||
|
c = cur_img (i, j - 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i, j);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i, j + 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
|
||
|
c = cur_img (i + 1, j - 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i + 1, j);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i + 1, j + 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
|
||
|
n += 9;
|
||
|
|
||
|
r /= n;
|
||
|
g /= n;
|
||
|
b /= n;
|
||
|
a /= n;
|
||
|
|
||
|
next_img (i, j) = rgba (r, g, b, a);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < width - 1; i++) {
|
||
|
next_img (i, 0) = cur_img(i, 0);
|
||
|
next_img (i, DIM - 1) = cur_img(i, DIM - 1);
|
||
|
next_img (0, i) = cur_img(0, i);
|
||
|
next_img (DIM - 1, i) = cur_img(DIM - 1, i);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int blur_do_tile_default_optim4 (int x, int y, int width, int height)
|
||
|
{
|
||
|
for (int i = y + 1; i < y + height - 1; i++) {
|
||
|
for (int j = x + 1; j < x + width - 1; j++) {
|
||
|
unsigned r = 0, g = 0, b = 0, a = 0, n = 0;
|
||
|
|
||
|
unsigned c = cur_img (i - 1, j - 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i - 1, j);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i - 1, j + 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
|
||
|
c = cur_img (i, j - 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i, j);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i, j + 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
|
||
|
c = cur_img (i + 1, j - 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i + 1, j);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
c = cur_img (i + 1, j + 1);
|
||
|
r += c >> 24;
|
||
|
g += (c >> 16) & 255;
|
||
|
b += (c >> 8) & 255;
|
||
|
a += c & 255;
|
||
|
|
||
|
n += 9;
|
||
|
|
||
|
r /= n;
|
||
|
g /= n;
|
||
|
b /= n;
|
||
|
a /= n;
|
||
|
|
||
|
next_img (i, j) = rgba (r, g, b, a);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < width - 1; i++) {
|
||
|
next_img (i, 0) = cur_img(i, 0);
|
||
|
next_img (i, DIM - 1) = cur_img(i, DIM - 1);
|
||
|
next_img (0, i) = cur_img(0, i);
|
||
|
next_img (DIM - 1, i) = cur_img(DIM - 1, i);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
///////////////////////////// Sequential version (tiled)
|
||
|
// Suggested cmdline(s):
|
||
|
// ./run -l images/1024.png -k blur -v seq
|
||
|
//
|
||
|
unsigned blur_compute_seq (unsigned nb_iter)
|
||
|
{
|
||
|
for (unsigned it = 1; it <= nb_iter; it++) {
|
||
|
|
||
|
do_tile (0, 0, DIM, DIM, 0);
|
||
|
|
||
|
swap_images ();
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
///////////////////////////// Tiled sequential version (tiled)
|
||
|
// Suggested cmdline(s):
|
||
|
// ./run -l images/1024.png -k blur -v tiled -ts 32 -m si
|
||
|
//
|
||
|
unsigned blur_compute_tiled (unsigned nb_iter)
|
||
|
{
|
||
|
for (unsigned it = 1; it <= nb_iter; it++) {
|
||
|
|
||
|
for (int y = 0; y < DIM; y += TILE_H)
|
||
|
for (int x = 0; x < DIM; x += TILE_W)
|
||
|
do_tile (x, y, TILE_W, TILE_H, 0);
|
||
|
|
||
|
swap_images ();
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|