Commit d86ceb5e authored by Xavier Teruel's avatar Xavier Teruel
Browse files

Initial commit

parents
ifeq ($(strip $(CXX)),)
CXX = gcc
endif
ifeq ($(strip $(CXX)),icpc)
CXXFLAGS = -qopenmp -std=c++14 -O2 -g -Wall
LDFLAGS = -qopenmp
else
CXXFLAGS = -fopenmp -std=c++14 -O2 -g -Wall
LDFLAGS = -fopenmp
endif
omp-critical.exe: omp-critical.o
$(CXX) $(LDFLAGS) $^ -o $@
omp-critical.o: omp-critical.cpp
$(CXX) $(CXXFLAGS) -c -I. $<
.PHONY: clean
clean:
rm -f *.exe *.o
# openmp-critical-section kernel
An oil & gas code had the openmp-critical-section pattern and the computational aspects of
the original code is recreated here. This application solves the 3D wave equation:
$$\frac{\partial^{2}u}{\partial t^{2}} & = c^{2}\nabla^{2}u$$
using the pseudospectral method. However, for the WP7 kernel the finite difference method was
selected for the purpose of simplicity which re-created the computational profile of the
original code.
The POP1 assessment identified a critical section which contained a large block of code that
enclosed computation as well as I/O. This created a large amount of thread locking time that
reduced the performance and subsequently the parallel scalability.
## Building the code
The code is built with the Intel C++ compiler which is the compiler used to build thee code.
Simply use the provided Makefile to build by typing the `CXX=icpc make` command. Note that the `-O2` flag
is used to explicity enable optimisation in conjunction with the `-g` flag. If the `-O2` flag
is omitted, the compiler will default to `-O0` (level zero) optimisation when using `-g`.
## Executing the code
To control the problem size, the `Ni`, `Nj`, and `Nk` control the number of cells in the x-, y-
and z-direction, respectively. The number of time steps is controlled by the `Nt` variable. The
executable is called `openmp-critical-section.exe`.
\ No newline at end of file
/*
SPDX-License-Identifier: BSD-3-Clause-Clear
Copyright (c) 2020, The Numerical Algorithms Group, Ltd. All rights reserved.
POP Co-Design kernel to replicate the computationally intensive kernel of an oil &
gas code.
The OpenMP parallel region contains a critical region which serialises the execution
and reduces the parallel scalability. In addition, the code spends a large amount of
time in thread lock.
Solves the 3D wave equation:
\frac{\partial^{2}u}{\partial t^{2}} & = c^{2}\nabla^{2}u
The number of cells in the x-, y- and z-direction are controlled by the Ni, Nj, and Nk
constants. The number of time steps is controlled by Nt.
*/
#include <iostream>
#include <fstream>
#include <memory>
#include <array>
#include <cmath>
#ifdef _OPENMP
#include <omp.h>
#endif
constexpr double xs = 0.0;
constexpr double xe = 1.0;
constexpr double ys = 0.0;
constexpr double ye = 1.0;
constexpr double zs = 0.0;
constexpr double ze = 1.0;
constexpr double ts = 0.0;
constexpr double te = 1.0;
constexpr int Ni = 500;
constexpr int Nj = 500;
constexpr int Nk = 500;
constexpr int Nt = 20;
constexpr double dx = ( xs - xe ) / (double)Ni;
constexpr double dy = ( ys - ye ) / (double)Nj;
constexpr double dz = ( zs - ze ) / (double)Nk;
constexpr double dt = ( ts - te ) / (double)Nt;
constexpr double c = 0.01;
double g( int i, int j, int k );
double h( int i, int j, int k );
template <size_t I, size_t J, size_t K, class T>
using array3d = std::array<std::array<std::array<T, K>, J>, I>;
// Helper typedef for chosen size
using data_array = array3d<Ni, Nj, Nk, double>;
int main( int argc, char *argv[] ) {
// Um stores the solution at time step n - 1
auto Um = std::make_unique<data_array>();
// Un stores the solution at time step n
auto Un = std::make_unique<data_array>();
// Up stores the solution at time step n + 1
auto Up = std::make_unique<data_array>();
double temp = 1.0/(dx*dx) + 1.0/(dy*dy) + 1.0/(dz*dz);
double cx, cy, cz, zslice_total = 0.0, t1, t2, total_time = 0.0;
std::cout << "POP WP7 kernel" << std::endl;
std::cout << "Version of code: original version with performance bottleneck"
<< std::endl;
std::cout << "Implements pattern: openmp-critical-section" << std::endl;
std::cout << "Problem size: Ni = " << Ni << ", Nj = " << Nj << ", Nk = "
<< Nk << ", Nt = " << Nt << std::endl;
#ifdef _OPENMP
std::cout << "OMP_NUM_THREADS = " << omp_get_max_threads() << std::endl;
#endif
if ( dt > 1.0/c*(sqrt(1.0/temp))) {
std::cerr << "Error: stability criteria failed" << std::endl;
exit( 1 );
}
#ifdef _OPENMP
t1 = omp_get_wtime();
#endif
cx = ( dt * c ) / dx;
cy = ( dt * c ) / dy;
cz = ( dt * c ) / dz;
#ifdef _OPENMP
#pragma omp parallel for collapse(3)
#endif
for ( int i = 0; i < Ni; i++ ) { // setting the initial/boundary conditions
for ( int j = 0; j < Nj; j++ ) {
for ( int k = 0; k < Nk; k++ ) {
(*Um)[i][j][k] = g( i, j , k );
(*Un)[i][j][k] = dt*((*Um)[i][j][k] + h( i, j, k ));
}
}
}
for ( int t = 1; t <= Nt; t++ ) { // main time loop
#ifdef _OPENMP
#pragma omp parallel
{
#pragma omp for collapse(3)
#endif
for ( int i = 1; i < Ni - 1; i++ ) {
for ( int j = 1; j < Nj - 1; j++ ) {
for ( int k = 1; k < Nk - 1; k++ ) {
(*Up)[i][j][k] = 2.0*(*Un)[i][j][k] - (*Um)[i][j][k] +
cx*cx * ( (*Un)[i+1][j][k] - 2.0*(*Un)[i][j][k] + (*Un)[i-1][j][k] ) +
cy*cy * ( (*Un)[i][j+1][k] - 2.0*(*Un)[i][j][k] + (*Un)[i][j-1][k] ) +
cz*cz * ( (*Un)[i][j][k+1] - 2.0*(*Un)[i][j][k] + (*Un)[i][j][k-1] );
#ifdef _OPENMP
#pragma omp critical
{
#endif
zslice_total += (*Un)[i][j][k];
#ifdef _OPENMP
}
#endif
} // k loop
} // j loop
} // i loop
#ifdef _OPENMP
#pragma omp for collapse(3)
#endif
// copy arrays back again
for ( int i = 0; i < Ni; i++ ) {
for ( int j = 0; j < Nj; j++ ) {
for ( int k = 0; k < Nk; k++ ) {
(*Um)[i][j][k] = (*Un)[i][j][k];
(*Un)[i][j][k] = (*Up)[i][j][k];
}
}
}
#ifdef _OPENMP
} // end openmp parallel region
#endif
std::cout << "Time step t = " << t << " of " << Nt << std::endl;
} // main time loop
#ifdef _OPENMP
t2 = omp_get_wtime();
total_time = t2 - t1;
std::cout << "Code wall time = " << total_time << std::endl;
#endif
return 0;
}
double g( int i, int j, int k ) {
if ( ( Ni/4 <= i && i <= Ni/2 + Ni/4 ) &&
( Nj/4 <= j && j <= Nj/2 + Nj/4 ) &&
( Nk/4 <= k && k <= Nk/2 + Nk/4 ) ) {
return 1.0;
} else {
return 0.0;
}
}
double h( int i, int j, int k ) {
return 0.0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment