Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Jon Gibson
omp-critical
Commits
d86ceb5e
Commit
d86ceb5e
authored
Mar 19, 2020
by
Xavier Teruel
Browse files
Initial commit
parents
Changes
3
Hide whitespace changes
Inline
Side-by-side
Makefile
0 → 100644
View file @
d86ceb5e
ifeq
($(strip $(CXX)),)
CXX
=
gcc
endif
ifeq
($(strip $(CXX)),icpc)
CXXFLAGS
=
-qopenmp
-std
=
c++14
-O2
-g
-Wall
LDFLAGS
=
-qopenmp
else
CXXFLAGS
=
-fopenmp
-std
=
c++14
-O2
-g
-Wall
LDFLAGS
=
-fopenmp
endif
omp-critical.exe
:
omp-critical.o
$(CXX)
$(LDFLAGS)
$^
-o
$@
omp-critical.o
:
omp-critical.cpp
$(CXX)
$(CXXFLAGS)
-c
-I
.
$<
.PHONY
:
clean
clean
:
rm
-f
*
.exe
*
.o
README.md
0 → 100644
View file @
d86ceb5e
# openmp-critical-section kernel
An oil & gas code had the openmp-critical-section pattern and the computational aspects of
the original code is recreated here. This application solves the 3D wave equation:
$$
\f
rac{
\p
artial^{2}u}{
\p
artial t^{2}} & = c^{2}
\n
abla^{2}u$$
using the pseudospectral method. However, for the WP7 kernel the finite difference method was
selected for the purpose of simplicity which re-created the computational profile of the
original code.
The POP1 assessment identified a critical section which contained a large block of code that
enclosed computation as well as I/O. This created a large amount of thread locking time that
reduced the performance and subsequently the parallel scalability.
## Building the code
The code is built with the Intel C++ compiler which is the compiler used to build thee code.
Simply use the provided Makefile to build by typing the
`CXX=icpc make`
command. Note that the
`-O2`
flag
is used to explicity enable optimisation in conjunction with the
`-g`
flag. If the
`-O2`
flag
is omitted, the compiler will default to
`-O0`
(level zero) optimisation when using
`-g`
.
## Executing the code
To control the problem size, the
`Ni`
,
`Nj`
, and
`Nk`
control the number of cells in the x-, y-
and z-direction, respectively. The number of time steps is controlled by the
`Nt`
variable. The
executable is called
`openmp-critical-section.exe`
.
\ No newline at end of file
omp-critical.cpp
0 → 100644
View file @
d86ceb5e
/*
SPDX-License-Identifier: BSD-3-Clause-Clear
Copyright (c) 2020, The Numerical Algorithms Group, Ltd. All rights reserved.
POP Co-Design kernel to replicate the computationally intensive kernel of an oil &
gas code.
The OpenMP parallel region contains a critical region which serialises the execution
and reduces the parallel scalability. In addition, the code spends a large amount of
time in thread lock.
Solves the 3D wave equation:
\frac{\partial^{2}u}{\partial t^{2}} & = c^{2}\nabla^{2}u
The number of cells in the x-, y- and z-direction are controlled by the Ni, Nj, and Nk
constants. The number of time steps is controlled by Nt.
*/
#include <iostream>
#include <fstream>
#include <memory>
#include <array>
#include <cmath>
#ifdef _OPENMP
#include <omp.h>
#endif
constexpr
double
xs
=
0.0
;
constexpr
double
xe
=
1.0
;
constexpr
double
ys
=
0.0
;
constexpr
double
ye
=
1.0
;
constexpr
double
zs
=
0.0
;
constexpr
double
ze
=
1.0
;
constexpr
double
ts
=
0.0
;
constexpr
double
te
=
1.0
;
constexpr
int
Ni
=
500
;
constexpr
int
Nj
=
500
;
constexpr
int
Nk
=
500
;
constexpr
int
Nt
=
20
;
constexpr
double
dx
=
(
xs
-
xe
)
/
(
double
)
Ni
;
constexpr
double
dy
=
(
ys
-
ye
)
/
(
double
)
Nj
;
constexpr
double
dz
=
(
zs
-
ze
)
/
(
double
)
Nk
;
constexpr
double
dt
=
(
ts
-
te
)
/
(
double
)
Nt
;
constexpr
double
c
=
0.01
;
double
g
(
int
i
,
int
j
,
int
k
);
double
h
(
int
i
,
int
j
,
int
k
);
template
<
size_t
I
,
size_t
J
,
size_t
K
,
class
T
>
using
array3d
=
std
::
array
<
std
::
array
<
std
::
array
<
T
,
K
>
,
J
>
,
I
>
;
// Helper typedef for chosen size
using
data_array
=
array3d
<
Ni
,
Nj
,
Nk
,
double
>
;
int
main
(
int
argc
,
char
*
argv
[]
)
{
// Um stores the solution at time step n - 1
auto
Um
=
std
::
make_unique
<
data_array
>
();
// Un stores the solution at time step n
auto
Un
=
std
::
make_unique
<
data_array
>
();
// Up stores the solution at time step n + 1
auto
Up
=
std
::
make_unique
<
data_array
>
();
double
temp
=
1.0
/
(
dx
*
dx
)
+
1.0
/
(
dy
*
dy
)
+
1.0
/
(
dz
*
dz
);
double
cx
,
cy
,
cz
,
zslice_total
=
0.0
,
t1
,
t2
,
total_time
=
0.0
;
std
::
cout
<<
"POP WP7 kernel"
<<
std
::
endl
;
std
::
cout
<<
"Version of code: original version with performance bottleneck"
<<
std
::
endl
;
std
::
cout
<<
"Implements pattern: openmp-critical-section"
<<
std
::
endl
;
std
::
cout
<<
"Problem size: Ni = "
<<
Ni
<<
", Nj = "
<<
Nj
<<
", Nk = "
<<
Nk
<<
", Nt = "
<<
Nt
<<
std
::
endl
;
#ifdef _OPENMP
std
::
cout
<<
"OMP_NUM_THREADS = "
<<
omp_get_max_threads
()
<<
std
::
endl
;
#endif
if
(
dt
>
1.0
/
c
*
(
sqrt
(
1.0
/
temp
)))
{
std
::
cerr
<<
"Error: stability criteria failed"
<<
std
::
endl
;
exit
(
1
);
}
#ifdef _OPENMP
t1
=
omp_get_wtime
();
#endif
cx
=
(
dt
*
c
)
/
dx
;
cy
=
(
dt
*
c
)
/
dy
;
cz
=
(
dt
*
c
)
/
dz
;
#ifdef _OPENMP
#pragma omp parallel for collapse(3)
#endif
for
(
int
i
=
0
;
i
<
Ni
;
i
++
)
{
// setting the initial/boundary conditions
for
(
int
j
=
0
;
j
<
Nj
;
j
++
)
{
for
(
int
k
=
0
;
k
<
Nk
;
k
++
)
{
(
*
Um
)[
i
][
j
][
k
]
=
g
(
i
,
j
,
k
);
(
*
Un
)[
i
][
j
][
k
]
=
dt
*
((
*
Um
)[
i
][
j
][
k
]
+
h
(
i
,
j
,
k
));
}
}
}
for
(
int
t
=
1
;
t
<=
Nt
;
t
++
)
{
// main time loop
#ifdef _OPENMP
#pragma omp parallel
{
#pragma omp for collapse(3)
#endif
for
(
int
i
=
1
;
i
<
Ni
-
1
;
i
++
)
{
for
(
int
j
=
1
;
j
<
Nj
-
1
;
j
++
)
{
for
(
int
k
=
1
;
k
<
Nk
-
1
;
k
++
)
{
(
*
Up
)[
i
][
j
][
k
]
=
2.0
*
(
*
Un
)[
i
][
j
][
k
]
-
(
*
Um
)[
i
][
j
][
k
]
+
cx
*
cx
*
(
(
*
Un
)[
i
+
1
][
j
][
k
]
-
2.0
*
(
*
Un
)[
i
][
j
][
k
]
+
(
*
Un
)[
i
-
1
][
j
][
k
]
)
+
cy
*
cy
*
(
(
*
Un
)[
i
][
j
+
1
][
k
]
-
2.0
*
(
*
Un
)[
i
][
j
][
k
]
+
(
*
Un
)[
i
][
j
-
1
][
k
]
)
+
cz
*
cz
*
(
(
*
Un
)[
i
][
j
][
k
+
1
]
-
2.0
*
(
*
Un
)[
i
][
j
][
k
]
+
(
*
Un
)[
i
][
j
][
k
-
1
]
);
#ifdef _OPENMP
#pragma omp critical
{
#endif
zslice_total
+=
(
*
Un
)[
i
][
j
][
k
];
#ifdef _OPENMP
}
#endif
}
// k loop
}
// j loop
}
// i loop
#ifdef _OPENMP
#pragma omp for collapse(3)
#endif
// copy arrays back again
for
(
int
i
=
0
;
i
<
Ni
;
i
++
)
{
for
(
int
j
=
0
;
j
<
Nj
;
j
++
)
{
for
(
int
k
=
0
;
k
<
Nk
;
k
++
)
{
(
*
Um
)[
i
][
j
][
k
]
=
(
*
Un
)[
i
][
j
][
k
];
(
*
Un
)[
i
][
j
][
k
]
=
(
*
Up
)[
i
][
j
][
k
];
}
}
}
#ifdef _OPENMP
}
// end openmp parallel region
#endif
std
::
cout
<<
"Time step t = "
<<
t
<<
" of "
<<
Nt
<<
std
::
endl
;
}
// main time loop
#ifdef _OPENMP
t2
=
omp_get_wtime
();
total_time
=
t2
-
t1
;
std
::
cout
<<
"Code wall time = "
<<
total_time
<<
std
::
endl
;
#endif
return
0
;
}
double
g
(
int
i
,
int
j
,
int
k
)
{
if
(
(
Ni
/
4
<=
i
&&
i
<=
Ni
/
2
+
Ni
/
4
)
&&
(
Nj
/
4
<=
j
&&
j
<=
Nj
/
2
+
Nj
/
4
)
&&
(
Nk
/
4
<=
k
&&
k
<=
Nk
/
2
+
Nk
/
4
)
)
{
return
1.0
;
}
else
{
return
0.0
;
}
}
double
h
(
int
i
,
int
j
,
int
k
)
{
return
0.0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment