Commit acffd6bd authored by mal539's avatar mal539
Browse files

Commit with Version_0 master. Chunksize is 1.

parent c51a58cb
......@@ -1295,13 +1295,11 @@ void LocalMultiTraceOperator<LO, SC>::apply(
SC alpha,
SC beta
) {
// MPI_Barrier( MPI_COMM_WORLD );
// /* classic multipplication */
// this->apply_no_sync_optimized( x, y, transA, alpha, beta / this->nprocs );
// version 1 - master
this->apply_no_sync( x, y, transA, alpha, beta / this->nprocs );
double reft_ = MPI_Wtime();
// MPI_Barrier( MPI_COMM_WORLD );
double reft = MPI_Wtime();
MPI_Allreduce( MPI_IN_PLACE, y.getData( ), y.getLength( ), GetType<LO, SC>::MPI_SC( ), MPI_SUM, MPI_COMM_WORLD );
double time_to_gather = MPI_Wtime() - reft;
......@@ -1310,13 +1308,10 @@ void LocalMultiTraceOperator<LO, SC>::apply(
MeasurementExport::add_to_specific_value( time_to_gather, "vector [MPI Synchronization]", "s" );
// // non-blocking synchronization
// if( transA ){
// throw runtime_error( "Error: LocalMultiTraceOperator::apply( ... ), non-blocking synchronization scheme does not support transposed apply yet\n" );
// }
// else{
// this->apply_nonblocking( x, y, false, alpha, beta / this->nprocs );
// }
// version 2 - non-blocking + automatically correct chunk-size
// non-blocking synchronization
// this->apply_nonblocking( x, y, false, alpha, beta / this->nprocs );
}
template<class LO, class SC>
......@@ -2042,7 +2037,9 @@ void LocalMultiTraceOperator<LO, SC>::apply_no_sync(
LO n = this->global_row_indices_PI_GLOBAL.size();
LO ri, ci;
SC v;
// define chunk-size
CHUNKSIZE = (LO) (n / nts) / 1;
CHUNKSIZE = 1;
if( transA ){
#pragma omp for schedule( dynamic, CHUNKSIZE )
......@@ -2072,7 +2069,7 @@ void LocalMultiTraceOperator<LO, SC>::apply_no_sync(
SC v;
if( transA ){
#pragma omp for schedule( dynamic, 500 )
#pragma omp for schedule( dynamic, 1 )
for( LO i = 0; i < n; ++i ){
ri = this->global_row_indices_ID_GLOBAL[ i ];
ci = this->global_col_indices_ID_GLOBAL[ i ];
......@@ -2082,7 +2079,7 @@ void LocalMultiTraceOperator<LO, SC>::apply_no_sync(
}
}
else{
#pragma omp for schedule( dynamic, 500 )
#pragma omp for schedule( dynamic, 1 )
for( LO i = 0; i < n; ++i ){
ri = this->global_row_indices_ID_GLOBAL[ i ];
ci = this->global_col_indices_ID_GLOBAL[ i ];
......@@ -2106,13 +2103,6 @@ void LocalMultiTraceOperator<LO, SC>::apply_no_sync(
} // end of parallel region
MeasurementExport::add_to_specific_value( ProgressMonitor::getElapsedTime( ) - reft, "apply", "s", true );
// C - 0.5*PI + a*(I-PI)
// if(std::abs(partial_alpha_PI) > 0.0){
// this->PI_GLOBAL.apply( x, y, transA, partial_alpha_PI * alpha, 1.0 );
// }
// if(std::abs(partial_alpha_I) > 0.0){
// this->ID_GLOBAL.apply( x, y, transA, partial_alpha_I * alpha, 1.0 );
// }
}
template<class LO, class SC>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment