Provides C++ wrapper around MPI routines. More...

#include <AMP_MPI.h>

Classes
class	Request

Public Types
typedef MPI_Comm	Comm

typedef MPI_Datatype	Datatype

typedef MPI_Request	Request2

enum class	ThreadSupport : int { SINGLE , FUNNELED , SERIALIZED , MULTIPLE }

Public Member Functions
void	abort () const

template<class type >
std::vector< type >	allGather (const std::vector< type > &x) const

template<class type >
std::vector< type >	allGather (const type &x) const

template<class type >
void	allGather (const type &x_in, type *x_out) const

template<class type >
int	allGather (const type send_data, int send_cnt, type recv_data, int recv_cnt=nullptr, int recv_disp=nullptr, bool known_recv=false) const

bool	allReduce (const bool value) const
	Boolean all reduce.

void	allReduce (std::vector< bool > &value) const
	Boolean all reduce.

template<class type >
std::vector< type >	allToAll (const std::vector< type > &send) const

template<class type , class int_type >
std::vector< type >	allToAll (const std::vector< type > &send_data, const std::vector< int_type > &send_cnt, const std::vector< int_type > &send_disp, const std::vector< int_type > &recv_cnt, const std::vector< int_type > &recv_disp) const

template<class type >
int	allToAll (const type send_data, const int send_cnt[], const int send_disp[], type recv_data, int recv_cnt=nullptr, int recv_disp=nullptr, bool known_recv=false) const

template<class type >
void	allToAll (int n, const type send_data, type recv_data) const

	AMP_MPI ()
	Empty constructor.

	AMP_MPI (AMP_MPI &&rhs)

	AMP_MPI (Comm comm, bool manage=false)
	Constructor from existing MPI communicator.

	AMP_MPI (const AMP_MPI &comm)
	Constructor from existing communicator.

bool	anyReduce (const bool value) const
	Boolean any reduce.

void	anyReduce (std::vector< bool > &value) const
	Boolean any reduce.

void	barrier () const

template<class type >
type	bcast (const type &value, int root) const
	Broadcast.

template<class type >
void	bcast (type *value, int n, int root) const
	Broadcast.

int	calcAllToAllDisp (const int send_cnt, int send_disp, int recv_cnt, int recv_disp) const

int	calcAllToAllDisp (const std::vector< int > &send_cnt, std::vector< int > &send_disp, std::vector< int > &recv_cnt, std::vector< int > &recv_disp) const

std::vector< int >	commRanks (const std::vector< int > &ranks) const
	Send a list of proccesor ids to communicate.

int	compare (const AMP_MPI &) const
	Compare to another communicator.

AMP_MPI	dup (bool manage=true) const
	Duplicate an existing communicator.

template<class type >
std::vector< type >	gather (const std::vector< type > &x, int root) const

template<class type >
std::vector< type >	gather (const type &x, int root) const

template<class type >
void	gather (const type send_data, int send_cnt, type recv_data, const int recv_cnt, const int recv_disp, int root) const

const Comm &	getCommunicator () const

int	getRank () const

int	getSize () const

std::vector< int >	globalRanks () const
	Return the global ranks for the comm.

uint64_t	hash () const
	Return a unique hash id for the comm.

uint64_t	hashRanks () const
	Return a hash global ranks.

std::tuple< int, int, int >	Iprobe (int source=-1, int tag=-1) const
	Nonblocking test for a message.

template<class type >
Request	Irecv (type &data, int send_proc, int tag) const
	This function receives an MPI message with a data array from another processor using a non-blocking call.

template<class type >
Request	Irecv (type *buf, int length, int send_proc, int tag) const
	This function receives an MPI message with a data array from another processor using a non-blocking call.

Request	IrecvBytes (void *buf, int N_bytes, int send_proc, int tag) const
	This function receives an MPI message with an array of max size number_bytes (MPI_BYTES) from any processor.

template<class type >
Request	Isend (const type &data, int recv_proc, int tag) const
	This function sends an MPI message with an array to another processor using a non-blocking call. The receiving processor must know the length of the array. This call must be paired with a matching call to Irecv.

template<class type >
Request	Isend (const type *buf, int length, int recv_proc, int tag) const
	This function sends an MPI message with an array to another processor using a non-blocking call. The receiving processor must know the length of the array. This call must be paired with a matching call to Irecv.

Request	IsendBytes (const void *buf, int N_bytes, int recv_proc, int tag) const
	This function sends an MPI message with an array of bytes (MPI_BYTES) to receiving_proc_number using a non-blocking call. The receiving processor must know the number of bytes to receive. This call must be paired with a matching call to IrecvBytes.

bool	isNull () const

template<class KEY , class DATA >
void	mapGather (std::map< KEY, DATA > &map) const

template<class type >
type	maxReduce (const type &value) const
	Max Reduce.

template<class type >
void	maxReduce (const type x, type y, int n) const
	Max Reduce.

template<class type >
void	maxReduce (const type x, type y, int n, int *rank) const
	Max Reduce.

template<class type >
void	maxReduce (type *x, int n) const
	Max Reduce.

template<class type >
void	maxReduce (type x, int n, int rank) const
	Max Reduce.

template<class type >
type	maxScan (const type &x) const
	Scan Max Reduce.

template<class type >
void	maxScan (const type x, type y, int n) const
	Scan Max Reduce.

int	maxTag () const

template<class type >
type	minReduce (const type &value) const
	Min Reduce.

template<class type >
void	minReduce (const type x, type y, int n) const
	Min Reduce.

template<class type >
void	minReduce (const type x, type y, int n, int *rank) const
	Sum Reduce.

template<class type >
void	minReduce (type *x, int n) const
	Min Reduce.

template<class type >
void	minReduce (type x, int n, int rank) const
	Min Reduce.

template<class type >
type	minScan (const type &x) const
	Scan Min Reduce.

template<class type >
void	minScan (const type x, type y, int n) const
	Scan Min Reduce.

int	newTag () const
	Return a new tag.

bool	operator!= (const AMP_MPI &) const
	Overload operator !=.

bool	operator< (const AMP_MPI &) const
	Overload operator <.

bool	operator<= (const AMP_MPI &) const
	Overload operator <=.

AMP_MPI &	operator= (AMP_MPI &&rhs)

AMP_MPI &	operator= (const AMP_MPI &comm)
	Assignment operator.

bool	operator== (const AMP_MPI &) const
	Overload operator ==.

bool	operator> (const AMP_MPI &) const
	Overload operator >

bool	operator>= (const AMP_MPI &) const
	Overload operator >=.

std::tuple< int, int, int >	probe (int source=-1, int tag=-1) const
	Blocking test for a message.

size_t	rand () const
	Generate a random number.

template<class type >
type	recv (int send, int tag=0) const
	This function receives an MPI message with a data array from another processor. This call must be paired with a matching call to send.

template<class type >
void	recv (type *buf, int &length, int send, bool get_length, int tag=0) const
	This function receives an MPI message with a data array from another processor.

template<class type >
void	recv (type *buf, int length, int send, int tag=0) const
	This function receives an MPI message with a data array from another processor. This call must be paired with a matching call to send.

void	recvBytes (void *buf, int N_bytes, int send, int tag=0) const
	This function receives an MPI message with an array of max size number_bytes (MPI_BYTES) from any processor.

void	reset ()
	Reset the object.

template<class type >
void	send (const type &data, int recv, int tag=0) const
	This function sends an MPI message with an array to another processor.

template<class type >
void	send (const type *buf, int length, int recv, int tag=0) const
	This function sends an MPI message with an array to another processor.

void	sendBytes (const void *buf, int N_bytes, int recv, int tag=0) const
	This function sends an MPI message with an array of bytes (MPI_BYTES) to receiving_proc_number.

template<class type >
void	sendrecv (const type sendbuf, int sendcount, int dest, int sendtag, type recvbuf, int recvcount, int source, int recvtag) const
	This function sends and recieves data using a blocking call.

void	serializeStart () const
	Start a serial region.

void	serializeStop () const
	Stop a serial region.

void	setCallAbortInSerialInsteadOfExit (bool flag=true)

template<class type >
void	setGather (std::set< type > &set) const

void	sleepBarrier (int ms=10) const
	Perform a global barrier putting idle processors to sleep.

AMP_MPI	split (int color, int key=-1, bool manage=true) const
	Split an existing communicator.

AMP_MPI	splitByNode (int key=-1, bool manage=true) const
	Split an existing communicator by node.

template<class type >
type	sumReduce (const type &value) const
	Sum Reduce.

template<class type >
void	sumReduce (const type x, type y, int n=1) const
	Sum Reduce.

template<class type >
void	sumReduce (type *x, int n=1) const
	Sum Reduce.

template<class type >
type	sumScan (const type &x) const
	Scan Sum Reduce.

template<class type >
void	sumScan (const type x, type y, int n) const
	Scan Sum Reduce.

	~AMP_MPI ()
	Empty destructor.

Static Public Member Functions
static void	balanceProcesses (const AMP_MPI &comm=AMP_MPI(AMP::AMP_MPI::commWorld), int method=1, const std::vector< int > &procs=std::vector< int >(), int N_min=1, int N_max=-1)
	Load balance the processes within a node.

static void	changeProfileLevel (int level)
	Change the level of the internal timers.

static std::string	getNodeName ()
	Get the node name.

static int	getNumberOfProcessors ()
	Function to return the number of processors available.

static std::vector< int >	getProcessAffinity ()
	Function to return the affinity of the current process.

static std::string	info ()
	Return details about MPI.

static AMP_MPI	intersect (const AMP_MPI &comm1, const AMP_MPI &comm2)
	Create a communicator from the intersection of two communicators.

static bool	MPI_Active ()
	Check if MPI is active.

static size_t	MPI_Comm_created ()
	Return the total number of MPI_Comm objects that have been created.

static size_t	MPI_Comm_destroyed ()
	Return the total number of MPI_Comm objects that have been destroyed.

static ThreadSupport	queryThreadSupport ()
	Query the level of thread support.

static void	setProcessAffinity (const std::vector< int > &procs)
	Function to set the affinity of the current process.

static void	start_MPI (int &argc, char *argv[], int profile_level=0)
	Start MPI.

static void	stop_MPI ()
	Stop MPI.

static double	tick ()
	Timer resolution.

static double	time ()
	Elapsed time.

static std::array< int, 2 >	version ()
	Return the MPI version number { major, minor }.

static void	wait (const Request &request)
	Wait for a communication to finish.

static void	wait (Request2 request)
	Wait for a communication to finish.

static void	waitAll (int count, const Request *request)
	Wait for all communications to finish.

static void	waitAll (int count, Request2 *request)
	Wait for all communications to finish.

static int	waitAny (int count, const Request *request)
	Wait for any communication to finish.

static int	waitAny (int count, Request2 *request)
	Wait for any communication to finish.

static std::vector< int >	waitSome (int count, const Request *request)
	Wait for some communications to finish.

static std::vector< int >	waitSome (int count, Request2 *request)
	Wait for some communications to finish.

Static Public Attributes
static Comm	commNull

static Comm	commSelf

static Comm	commWorld

static constexpr bool	has_MPI = true

static constexpr uint64_t	hashMPI = 0x641118b35a0d87cd

static constexpr uint64_t	hashNull = 0xcc6bc5507c132516

static constexpr uint64_t	hashSelf = 0x070b9699a107fe57

static constexpr uint64_t	hashWorld = 0x3d5fdf58e4df5a94

Private Types
using	atomic_int = volatile std::atomic_int64_t

using	atomic_ptr = std::atomic_int *volatile

using	int_ptr = int *volatile

using	rand_ptr = std::mt19937_64 *volatile

Private Member Functions
std::mt19937_64 *	getRand () const

Private Attributes
bool	d_call_abort = true
	Do we want to call MPI_abort instead of exit.

Comm	d_comm = commNull
	The internal MPI communicator.

atomic_ptr	d_count = nullptr
	How many objects share the communicator.

int_ptr	d_currentTag = nullptr
	The current tag.

uint64_t	d_hash = hashNull
	A unique hash for the comm (consistent across comm)

bool	d_isNull = true
	Is the communicator NULL.

bool	d_manage = false
	Do we want to manage this communicator.

rand_ptr	d_rand = nullptr
	Internal random number generator.

int	d_rank = 0
	The rank of the communicator.

int_ptr	d_ranks = nullptr
	The ranks of the comm in the global comm.

int	d_size = 1
	The size of the communicator.

Static Private Attributes
static int	d_maxTag
	The maximum valid tag.

static atomic_int	N_MPI_Comm_created
	Number of MPI_Comm objects created over time.

static atomic_int	N_MPI_Comm_destroyed
	Number of MPI_Comm objects destroyed over time.

static short	profile_level
	The level for the profiles of MPI.

Detailed Description

Provides C++ wrapper around MPI routines.

Class AMP_MPI groups common MPI routines into one globally-accessible location. It provides small, simple routines that are common in MPI code. In some cases, the calling syntax has been simplified for convenience. Moreover, there is no reason to include the preprocessor ifdef/endif guards around these calls, since the MPI libraries are not called in these routines if the MPI libraries are not being used (e.g., when writing serial code). Note: Many of the communication routines are templated on type. When using unknown types the reduce calls will fail, the send and gather calls should succeed provided that the size of the data type object is a fixed size on all processors. sizeof(type) must be the same for all elements and processors.

Definition at line 62 of file AMP_MPI.h.

Member Typedef Documentation

◆ atomic_int

using AMP::AMP_MPI::atomic_int = volatile std::atomic_int64_t

private

Definition at line 1469 of file AMP_MPI.h.

◆ atomic_ptr

using AMP::AMP_MPI::atomic_ptr = std::atomic_int *volatile

private

Definition at line 1468 of file AMP_MPI.h.

◆ Comm

typedef MPI_Comm AMP::AMP_MPI::Comm

Definition at line 68 of file AMP_MPI.h.

◆ Datatype

typedef MPI_Datatype AMP::AMP_MPI::Datatype

Definition at line 69 of file AMP_MPI.h.

◆ int_ptr

using AMP::AMP_MPI::int_ptr = int *volatile

private

Definition at line 1470 of file AMP_MPI.h.

◆ rand_ptr

using AMP::AMP_MPI::rand_ptr = std::mt19937_64 *volatile

private

Definition at line 1471 of file AMP_MPI.h.

◆ Request2

typedef MPI_Request AMP::AMP_MPI::Request2

Definition at line 70 of file AMP_MPI.h.

Member Enumeration Documentation

◆ ThreadSupport

enum class AMP::AMP_MPI::ThreadSupport : int

strong

Enumerator
SINGLE
FUNNELED
SERIALIZED
MULTIPLE

Definition at line 65 of file AMP_MPI.h.

Constructor & Destructor Documentation

◆ AMP_MPI() [1/4]

AMP::AMP_MPI::AMP_MPI ( )

Empty constructor.

This creates an empty constructor that does not contain an MPI communicator.

◆ ~AMP_MPI()

AMP::AMP_MPI::~AMP_MPI ( )

Empty destructor.

◆ AMP_MPI() [2/4]

AMP::AMP_MPI::AMP_MPI	(	Comm	comm,
		bool	manage = `false`
	)

Constructor from existing MPI communicator.

This constructor creates a new communicator from an existing MPI communicator. This does not create a new internal MPI_Comm, but uses the existing comm. Note that by default, this will not free the MPI_Comm object and the user is responsible for free'ing the MPI_Comm when it is no longer used. This behavior is controlled by the optional manage argument.

Parameters

[in]	comm	Existing MPI communicator
[in]	manage	Do we want to manage the comm (free the MPI_Comm when this object leaves scope)

◆ AMP_MPI() [3/4]

AMP::AMP_MPI::AMP_MPI ( const AMP_MPI & comm )

Constructor from existing communicator.

This constructor creates a new communicator from an existing communicator. This does not create a new internal Comm, but uses the existing comm.

Parameters

[in] comm Existing communicator

◆ AMP_MPI() [4/4]

AMP::AMP_MPI::AMP_MPI ( AMP_MPI && rhs )

Move constructor

Parameters

[in] rhs Communicator to copy

Member Function Documentation

◆ abort()

void AMP::AMP_MPI::abort ( ) const

Call MPI_Abort or exit depending on whether running with one or more processes and value set by function above, if called. The default is to call exit(-1) if running with one processor and to call MPI_Abort() otherwise. This function avoids having to guard abort calls in application code.

◆ allGather() [1/4]

template<class type >

std::vector< type > AMP::AMP_MPI::allGather ( const std::vector< type > & x ) const

Each processor sends every other processor an array

Parameters

[in] x Input array for allGather

Returns: Output array for allGather

◆ allGather() [2/4]

template<class type >

std::vector< type > AMP::AMP_MPI::allGather ( const type & x ) const

Each processor sends every other processor a single value.

Parameters

[in] x Input value for allGather

Returns: Output array for allGather

◆ allGather() [3/4]

template<class type >

void AMP::AMP_MPI::allGather	(	const type &	x_in,
		type *	x_out
	)		const

Each processor sends every other processor a single value. The x_out array should be preallocated to a length equal to the number of processors.

Parameters

[in]	x_in	Input value for allGather
[in]	x_out	Output array for allGather (must be preallocated to the size of the communicator)

◆ allGather() [4/4]

template<class type >

int AMP::AMP_MPI::allGather	(	const type *	send_data,
		int	send_cnt,
		type *	recv_data,
		int *	recv_cnt = `nullptr`,
		int *	recv_disp = `nullptr`,
		bool	known_recv = `false`
	)		const

Each processor sends an array of data to all other processors. Each processor receives the values from all processors and gathers them to a single array. If successful, the total number of received elements will be returned.

Parameters

[in]	send_data	Input array
[in]	send_cnt	The number of values to send
[in]	recv_data	Output array of received values
[in]	recv_cnt	The number of values to receive from each processor (N). If known, this should be provided as an input. Otherwise it is an optional output that will return the number of received values from each processor.
[in]	recv_disp	The displacement (relative to the start of the array) from which to store the data received from processor i. If known, this should be provided as an input. Otherwise it is an optional output that will return the starting location (relative to the start of the array) for the received data from processor i.
[in]	known_recv	Are the received counts and displacements known. If the received sizes are known, then they must be provided, and an extra communication step is not necessary. If the received sizes are not known, then an extra communication step will occur and the sizes and displacements will be returned (if desired).

◆ allReduce() [1/2]

bool AMP::AMP_MPI::allReduce ( const bool value ) const

Boolean all reduce.

This function performs a boolean all reduce across all processors. It returns true iff all processor are true;

Parameters

[in] value The input value for the all reduce

◆ allReduce() [2/2]

void AMP::AMP_MPI::allReduce ( std::vector< bool > & value ) const

Boolean all reduce.

This function performs a boolean all reduce across all processors. It returns true iff all processor are true;

Parameters

[in] value The input value for the all reduce

◆ allToAll() [1/4]

template<class type >

std::vector< type > AMP::AMP_MPI::allToAll ( const std::vector< type > & send ) const

Each processor sends a single value to each processor. Each processor sends a single value to each processor. Note that this is a blocking global communication.

Parameters

[in] send Input array (nxN)

Returns: Output array of received values (nxN)

◆ allToAll() [2/4]

template<class type , class int_type >

std::vector< type > AMP::AMP_MPI::allToAll	(	const std::vector< type > &	send_data,
		const std::vector< int_type > &	send_cnt,
		const std::vector< int_type > &	send_disp,
		const std::vector< int_type > &	recv_cnt,
		const std::vector< int_type > &	recv_disp
	)		const

Each processor sends an array of data to the different processors. Each processor may send any size array to any processor. In the variable description, N is the size of the communicator. Note that this is a blocking global communication.

Parameters

[in]	send_data	Input array
[in]	send_cnt	The number of values to send to each processor (N)
[in]	send_disp	The displacement (relative to the start of the array) from which to send to processor i
[in]	recv_cnt	The number of values to receive from each processor (N). If known, this should be provided as an input. Otherwise it is an optional output that will return the number of received values from each processor.
[in]	recv_disp	The displacement (relative to the start of the array) from which to send to processor i. If known, this should be provided as an input. Otherwise it is an optional output that will return the starting location (relative to the start of the array) for the received data from processor i.

◆ allToAll() [3/4]

template<class type >

int AMP::AMP_MPI::allToAll	(	const type *	send_data,
		const int	send_cnt[],
		const int	send_disp[],
		type *	recv_data,
		int *	recv_cnt = `nullptr`,
		int *	recv_disp = `nullptr`,
		bool	known_recv = `false`
	)		const

Each processor sends an array of data to the different processors. Each processor may send any size array to any processor. In the variable description, N is the size of the communicator. Note that this is a blocking global communication. If successful, the total number of received elements will be returned.

Parameters

[in]	send_data	Input array
[in]	send_cnt	The number of values to send to each processor (N)
[in]	send_disp	The displacement (relative to the start of the array) from which to send to processor i
[in]	recv_data	Output array of received values
[in]	recv_cnt	The number of values to receive from each processor (N). If known, this should be provided as an input. Otherwise it is an optional output that will return the number of received values from each processor.
[in]	recv_disp	The displacement (relative to the start of the array) from which to send to processor i. If known, this should be provided as an input. Otherwise it is an optional output that will return the starting location (relative to the start of the array) for the received data from processor i.
[in]	known_recv	Are the received counts and displacements known. If the received sizes are known, then they must be provided, and an extra communication step is not necessary. If the received sizes are not know, then an extra communication step will occur internally and the sizes and displacements will be returned (if desired).

◆ allToAll() [4/4]

template<class type >

void AMP::AMP_MPI::allToAll	(	int	n,
		const type *	send_data,
		type *	recv_data
	)		const

Each processor sends an array of n values to each processor. Each processor sends an array of n values to each processor. The jth block of data is sent from processor i to processor j and placed in the ith block on the receiving processor. In the variable description, N is the size of the communicator. Note that this is a blocking global communication.

Parameters

[in]	n	The number of elements in each data block to send.
[in]	send_data	Input array (nxN)
[in]	recv_data	Output array of received values (nxN)

◆ anyReduce() [1/2]

bool AMP::AMP_MPI::anyReduce ( const bool value ) const

Boolean any reduce.

This function performs a boolean any reduce across all processors. It returns true if any processor is true;

Parameters

[in] value The input value for the all reduce

◆ anyReduce() [2/2]

void AMP::AMP_MPI::anyReduce ( std::vector< bool > & value ) const

Boolean any reduce.

This function performs a boolean any reduce across all processors. It returns true if any processor is true;

Parameters

[in] value The input value for the all reduce

◆ balanceProcesses()

static void AMP::AMP_MPI::balanceProcesses	(	const AMP_MPI &	comm = `AMP_MPI(AMP::AMP_MPI::commWorld)`,
		int	method = `1`,
		const std::vector< int > &	procs = `std::vector< int >()`,
		int	N_min = `1`,
		int	N_max = `-1`
	)

static

Load balance the processes within a node.

This function will redistribute the processes within a node using the process affinities to achieve the desired load balance. Note: this is a global operation on the given comm, and it is STRONGLY recommended to use COMM_WORLD.

Parameters

[in]	comm	The communicator to use (Default is COMM_WORLD)
[in]	method	The desired load balance method to use: 1: Adjust the affinities so all processes share the given processors. This effectively allows the OS to handle the load balancing by migrating the processes as necessary. This is recommended for most users and use cases. (default) 2: Adjust the affinities so that the fewest number of processes overlap. This will try to give each process a unique set of processors while ensuring that each process has at least N_min processes.
[in]	procs	An optional list of processors to use. By default, setting this to an empty vector will use all available processors on the given node.
[in]	N_min	The minimum number of processors for any process (-1 indicates all available processors).
[in]	N_max	The maximum number of processors for any process (-1 indicates all available processors).

◆ barrier()

void AMP::AMP_MPI::barrier ( ) const

Perform a global barrier across all processors.

◆ bcast() [1/2]

template<class type >

type AMP::AMP_MPI::bcast	(	const type &	value,
		int	root
	)		const

Broadcast.

This function broadcasts a value from root to all processors

Parameters

[in]	value	The input value for the broadcast.
[in]	root	The processor performing the broadcast

Referenced by AMP::Operator::WeldOperator::apply().

◆ bcast() [2/2]

template<class type >

void AMP::AMP_MPI::bcast	(	type *	value,
		int	n,
		int	root
	)		const

Broadcast.

This function broadcasts an array from root to all processors

Parameters

[in]	value	The input/output array for the broadcast
[in]	n	The number of values in the array (must match on all nodes)
[in]	root	The processor performing the broadcast

◆ calcAllToAllDisp() [1/2]

int AMP::AMP_MPI::calcAllToAllDisp	(	const int *	send_cnt,
		int *	send_disp,
		int *	recv_cnt,
		int *	recv_disp
	)		const

Compute the number of values to recieve and the appropriate send/recv offests for AllToAll

Parameters

[in]	send_cnt	The number of values to send to each processor
[out]	send_disp	The displacement (relative to the start of the array) from which to send to processor i
[out]	recv_cnt	The number of values to receive from each processor. If known, this should be provided as an input. Otherwise it is an optional output that will return the number of received values from each processor.
[out]	recv_disp	The displacement (relative to the start of the array) from which to send to processor i.

◆ calcAllToAllDisp() [2/2]

int AMP::AMP_MPI::calcAllToAllDisp	(	const std::vector< int > &	send_cnt,
		std::vector< int > &	send_disp,
		std::vector< int > &	recv_cnt,
		std::vector< int > &	recv_disp
	)		const

Compute the number of values to recieve and the appropriate send/recv offests for AllToAll

Parameters

[in]	send_cnt	The number of values to send to each processor
[out]	send_disp	The displacement (relative to the start of the array) from which to send to processor i
[out]	recv_cnt	The number of values to receive from each processor. If known, this should be provided as an input. Otherwise it is an optional output that will return the number of received values from each processor.
[out]	recv_disp	The displacement (relative to the start of the array) from which to send to processor i.

◆ changeProfileLevel()

static void AMP::AMP_MPI::changeProfileLevel ( int level )

inlinestatic

Change the level of the internal timers.

This function changes the level of the timers used to profile MPI

Parameters

[in] level New level of the timers

Definition at line 1429 of file AMP_MPI.h.

References profile_level.

◆ commRanks()

std::vector< int > AMP::AMP_MPI::commRanks ( const std::vector< int > & ranks ) const

Send a list of proccesor ids to communicate.

This function communicates a list of proccesors to communicate. Given a list of ranks that we want to send/receieve data to/from, this routine will communicate that set to the other ranks returning the list of processors that want to communication with the current rank. Note: this routine will involved global communication

Parameters

[in] ranks List of ranks that the current rank wants to communicate with

Returns: List of ranks that want to communicate with the current processor

◆ compare()

int AMP::AMP_MPI::compare ( const AMP_MPI & ) const

Compare to another communicator.

This compares the current communicator to another communicator. This returns 1 if the two communicators are equal (they share the same MPI communicator), 2 if the contexts and groups are the same, 3 if different contexts but identical groups, 4 if different contexts but similar groups, and 0 otherwise. Note: this is a local operation.

◆ dup()

AMP_MPI AMP::AMP_MPI::dup ( bool manage = true ) const

Duplicate an existing communicator.

This creates a new communicator by duplicating an existing communicator. The resulting communicator will exist over the same processes, but have a different context. Note: the underlying MPI_Comm object will be free'd automatically when it is no longer used by any MPI objects.

Parameters

[in] manage Do we want to manage the comm (free the MPI_Comm when this object leaves scope)

◆ gather() [1/3]

template<class type >

std::vector< type > AMP::AMP_MPI::gather	(	const std::vector< type > &	x,
		int	root
	)		const

Each processor sends every other processor an array

Parameters

[in]	x	Input array to send
[in]	root	The processor receiving the data

Returns: Output array for gather (empty if not root)

◆ gather() [2/3]

template<class type >

std::vector< type > AMP::AMP_MPI::gather	(	const type &	x,
		int	root
	)		const

Each processor sends a value to root

Parameters

[in]	x	Input value to send
[in]	root	The processor receiving the data

Returns: Output array for gather (empty if not root)

◆ gather() [3/3]

template<class type >

void AMP::AMP_MPI::gather	(	const type *	send_data,
		int	send_cnt,
		type *	recv_data,
		const int *	recv_cnt,
		const int *	recv_disp,
		int	root
	)		const

Each processor sends multiple values to root

Parameters

[in]	send_data	Input array
[in]	send_cnt	The number of values to send
[out]	recv_data	Array of received values
[in]	recv_cnt	The number of values to receive from each processor (N). If known, this should be provided as an input.
[in]	recv_disp	The displacement (relative to the start of the array) from which to store the data received from processor i. If known, this should be provided as an input.
[in]	root	The root processor

◆ getCommunicator()

const Comm & AMP::AMP_MPI::getCommunicator ( ) const

Get the current MPI communicator. Note: The underlying MPI_Comm object may be free'd by the object when it is no longer used by any communicators. If the user has made a copy using the getCommunicator routine, then it may be free'd without user knowledge. The user is responsible for checking if the communicator is valid, or keeping a copy of the communicator that provided the MPI_Communicator.

Referenced by AMP::LinearAlgebra::NativePetscVectorFactory::getVector().

◆ getNodeName()

static std::string AMP::AMP_MPI::getNodeName ( )

static

Get the node name.

This function returns a unique name for each node. It is a wrapper for MPI_Get_processor_name.

◆ getNumberOfProcessors()

static int AMP::AMP_MPI::getNumberOfProcessors ( )

static

Function to return the number of processors available.

◆ getProcessAffinity()

static std::vector< int > AMP::AMP_MPI::getProcessAffinity ( )

static

Function to return the affinity of the current process.

◆ getRand()

std::mt19937_64 * AMP::AMP_MPI::getRand ( ) const

private

◆ getRank()

int AMP::AMP_MPI::getRank ( ) const

Return the processor rank (identifier) from 0 through the number of processors minus one.

Referenced by AMP::LinearAlgebra::ArrayVectorFactory< TYPE >::getVector().

◆ getSize()

int AMP::AMP_MPI::getSize ( ) const

Return the number of processors.

Referenced by AMP::LinearAlgebra::CSRMatrixCommunicator< Config >::CSRMatrixCommunicator().

◆ globalRanks()

std::vector< int > AMP::AMP_MPI::globalRanks ( ) const

Return the global ranks for the comm.

This returns a vector which contains the global ranks for each member of the communicator. The global ranks are defined according to WORLD comm.

◆ hash()

uint64_t AMP::AMP_MPI::hash ( ) const

Return a unique hash id for the comm.

This returns a hash which is unique for the comm. Two AMP_MPI objects that share the same underlying MPI_Comm object will have the same hash. Two objects that have the same ranks but different MPI_Comm objects will have different hash values.

◆ hashRanks()

uint64_t AMP::AMP_MPI::hashRanks ( ) const

Return a hash global ranks.

This returns a hash which is unique based on the global ranks. It will also be different for null and self comms. Two comms that share the same ranks (e.g. dup) will have the same hash

◆ info()

static std::string AMP::AMP_MPI::info ( )

static

Return details about MPI.

◆ intersect()

static AMP_MPI AMP::AMP_MPI::intersect	(	const AMP_MPI &	comm1,
		const AMP_MPI &	comm2
	)

static

Create a communicator from the intersection of two communicators.

This creates a new communicator by intersecting two existing communicators. Any processors that do not contain the both communicators will receive a NULL communicator. There are 3 possible cases: The communicators are disjoint (a null communicator will be returned on all processors). One communicator is a sub communicator of another. This will require communication on the smaller communicator only. The communicators partially overlap. This will require communication on the first communicator.

Parameters

[in]	comm1	First communicator
[in]	comm2	First communicator

◆ Iprobe()

std::tuple< int, int, int > AMP::AMP_MPI::Iprobe	(	int	source = `-1`,
		int	tag = `-1`
	)		const

Nonblocking test for a message.

This function performs a non-blocking test for a message. It will return the number of bytes in the message if a message with the specified source and tag (on the current communicator) is available. Otherwise it will return -1.

Parameters

[in]	source	source rank (-1: any source)
[in]	tag	tag (-1: any tag)

Returns: Tuple of three ints: matched source, matched tag, number of bytes

◆ Irecv() [1/2]

template<class type >

Request AMP::AMP_MPI::Irecv	(	type &	data,
		int	send_proc,
		int	tag
	)		const

This function receives an MPI message with a data array from another processor using a non-blocking call.

Parameters

[in]	data	Data to receive
[in]	send_proc	Processor number of sender.
[in]	tag	Optional integer argument specifying a tag which must be matched by the tag of the incoming message.

◆ Irecv() [2/2]

template<class type >

Request AMP::AMP_MPI::Irecv	(	type *	buf,
		int	length,
		int	send_proc,
		int	tag
	)		const

This function receives an MPI message with a data array from another processor using a non-blocking call.

Parameters

[in]	buf	Recieve buffer
[in]	length	Maximum number of values that can be stored in buf.
[in]	send_proc	Processor number of sender.
[in]	tag	Optional integer argument specifying a tag which must be matched by the tag of the incoming message.

◆ IrecvBytes()

Request AMP::AMP_MPI::IrecvBytes	(	void *	buf,
		int	N_bytes,
		int	send_proc,
		int	tag
	)		const

This function receives an MPI message with an array of max size number_bytes (MPI_BYTES) from any processor.

This call must be paired with a matching call to sendBytes.

Parameters

[in]	buf	Void pointer to a buffer of size number_bytes bytes.
[in]	N_bytes	Integer number specifying size of buf in bytes.
[in]	send_proc	Processor number of sender.
[in]	tag	Integer argument specifying a tag which must be matched by the tag of the incoming message.

◆ Isend() [1/2]

template<class type >

Request AMP::AMP_MPI::Isend	(	const type &	data,
		int	recv_proc,
		int	tag
	)		const

This function sends an MPI message with an array to another processor using a non-blocking call. The receiving processor must know the length of the array. This call must be paired with a matching call to Irecv.

Parameters

[in]	data	Data to send
[in]	recv_proc	Receiving processor number.
[in]	tag	Integer argument specifying an integer tag to be sent with this message.

◆ Isend() [2/2]

template<class type >

Request AMP::AMP_MPI::Isend	(	const type *	buf,
		int	length,
		int	recv_proc,
		int	tag
	)		const

This function sends an MPI message with an array to another processor using a non-blocking call. The receiving processor must know the length of the array. This call must be paired with a matching call to Irecv.

Parameters

[in]	buf	Pointer to array buffer with length integers.
[in]	length	Number of integers in buf that we want to send.
[in]	recv_proc	Receiving processor number.
[in]	tag	Integer argument specifying an integer tag to be sent with this message.

◆ IsendBytes()

Request AMP::AMP_MPI::IsendBytes	(	const void *	buf,
		int	N_bytes,
		int	recv_proc,
		int	tag
	)		const

This function sends an MPI message with an array of bytes (MPI_BYTES) to receiving_proc_number using a non-blocking call. The receiving processor must know the number of bytes to receive. This call must be paired with a matching call to IrecvBytes.

Parameters

[in]	buf	Void pointer to an array of number_bytes bytes to send.
[in]	N_bytes	Integer number of bytes to send.
[in]	recv_proc	Receiving processor number.
[in]	tag	Integer argument specifying an integer tag to be sent with this message.

◆ isNull()

bool AMP::AMP_MPI::isNull ( ) const

Check if the current communicator is NULL

◆ mapGather()

template<class KEY , class DATA >

void AMP::AMP_MPI::mapGather ( std::map< KEY, DATA > & map ) const

This function combines std::maps from different processors to create a single master std::map If two or more ranks share the same key, the lowest rank will be used

Parameters

[in] map Input/Output std::map for the gather.

◆ maxReduce() [1/5]

template<class type >

type AMP::AMP_MPI::maxReduce ( const type & value ) const

Max Reduce.

This function performs a max all reduce across all processor. It returns the maximum value across all processors;

Parameters

[in] value The input value for the all reduce

◆ maxReduce() [2/5]

template<class type >

void AMP::AMP_MPI::maxReduce	(	const type *	x,
		type *	y,
		int	n
	)		const

Max Reduce.

Perform an array max Reduce across all nodes. Each processor contributes an array of values, and the element-wise maximum is returned in the same array.

If a 'rank_of_min' argument is provided, it will set the array to the rank of process holding the minimum value. Like the double argument, the size of the supplied 'rank_of_min' array should be n.

Parameters

[in]	x	The input array for the reduce
[in]	y	The output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)

◆ maxReduce() [3/5]

template<class type >

void AMP::AMP_MPI::maxReduce	(	const type *	x,
		type *	y,
		int	n,
		int *	rank
	)		const

Max Reduce.

Perform an array max Reduce across all nodes. Each processor contributes an array of values, and the element-wise maximum is returned in the same array.

If a 'rank_of_min' argument is provided, it will set the array to the rank of process holding the minimum value. Like the double argument, the size of the supplied 'rank_of_min' array should be n.

Parameters

[in]	x	The input array for the reduce
[in]	y	The output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)
[out]	rank	Output array indicating the rank of the processor containing the minimum value

◆ maxReduce() [4/5]

template<class type >

void AMP::AMP_MPI::maxReduce	(	type *	x,
		int	n
	)		const

Max Reduce.

Perform an array max Reduce across all nodes. Each processor contributes an array of values, and the element-wise maximum is returned in the same array.

If a 'rank_of_min' argument is provided, it will set the array to the rank of process holding the minimum value. Like the double argument, the size of the supplied 'rank_of_min' array should be n.

Parameters

[in]	x	The input/output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)

◆ maxReduce() [5/5]

template<class type >

void AMP::AMP_MPI::maxReduce	(	type *	x,
		int	n,
		int *	rank
	)		const

Max Reduce.

Perform an array max Reduce across all nodes. Each processor contributes an array of values, and the element-wise maximum is returned in the same array.

If a 'rank_of_min' argument is provided, it will set the array to the rank of process holding the minimum value. Like the double argument, the size of the supplied 'rank_of_min' array should be n.

Parameters

[in]	x	The input/output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)
[out]	rank	Output array indicating the rank of the processor containing the minimum value

◆ maxScan() [1/2]

template<class type >

type AMP::AMP_MPI::maxScan ( const type & x ) const

Scan Max Reduce.

Computes the max scan (partial reductions) of data on a collection of processes. See MPI_Scan for more information.

Parameters

[in] x The input value for the scan

◆ maxScan() [2/2]

template<class type >

void AMP::AMP_MPI::maxScan	(	const type *	x,
		type *	y,
		int	n
	)		const

Scan Max Reduce.

Computes the max scan (partial reductions) of data on a collection of processes. See MPI_Scan for more information.

Parameters

[in]	x	The input array for the scan
[in]	y	The output array for the scan
[in]	n	The number of values in the array (must match on all nodes)

◆ maxTag()

int AMP::AMP_MPI::maxTag ( ) const

Return the maximum tag

◆ minReduce() [1/5]

template<class type >

type AMP::AMP_MPI::minReduce ( const type & value ) const

Min Reduce.

This function performs a min all reduce across all processor. It returns the minimum value across all processors;

Parameters

[in] value The input value for the all reduce

◆ minReduce() [2/5]

template<class type >

void AMP::AMP_MPI::minReduce	(	const type *	x,
		type *	y,
		int	n
	)		const

Min Reduce.

Perform an array min Reduce across all nodes. Each processor contributes an array of values, and the element-wise minimum is returned in the same array.

Parameters

[in]	x	The input array for the reduce
[in]	y	The output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)

◆ minReduce() [3/5]

template<class type >

void AMP::AMP_MPI::minReduce	(	const type *	x,
		type *	y,
		int	n,
		int *	rank
	)		const

Sum Reduce.

Perform an array min Reduce across all nodes. Each processor contributes an array of values, and the element-wise minimum is returned in the same array.

Parameters

[in]	x	The input array for the reduce
[in]	y	The output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)
[out]	rank	Output array indicating the rank of the processor containing the minimum value

◆ minReduce() [4/5]

template<class type >

void AMP::AMP_MPI::minReduce	(	type *	x,
		int	n
	)		const

Min Reduce.

Perform an array min Reduce across all nodes. Each processor contributes an array of values, and the element-wise minimum is returned in the same array.

Parameters

[in]	x	The input/output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)

◆ minReduce() [5/5]

template<class type >

void AMP::AMP_MPI::minReduce	(	type *	x,
		int	n,
		int *	rank
	)		const

Min Reduce.

Perform an array min Reduce across all nodes. Each processor contributes an array of values, and the element-wise minimum is returned in the same array.

Parameters

[in]	x	The input/output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)
[out]	rank	Output array indicating the rank of the processor containing the minimum value

◆ minScan() [1/2]

template<class type >

type AMP::AMP_MPI::minScan ( const type & x ) const

Scan Min Reduce.

Computes the min scan (partial reductions) of data on a collection of processes. See MPI_Scan for more information.

Parameters

[in] x The input value for the scan

◆ minScan() [2/2]

template<class type >

void AMP::AMP_MPI::minScan	(	const type *	x,
		type *	y,
		int	n
	)		const

Scan Min Reduce.

Computes the min scan (partial reductions) of data on a collection of processes. See MPI_Scan for more information.

Parameters

[in]	x	The input array for the scan
[in]	y	The output array for the scan
[in]	n	The number of values in the array (must match on all nodes)

◆ MPI_Active()

static bool AMP::AMP_MPI::MPI_Active ( )

static

Check if MPI is active.

◆ MPI_Comm_created()

static size_t AMP::AMP_MPI::MPI_Comm_created ( )

static

Return the total number of MPI_Comm objects that have been created.

◆ MPI_Comm_destroyed()

static size_t AMP::AMP_MPI::MPI_Comm_destroyed ( )

static

Return the total number of MPI_Comm objects that have been destroyed.

◆ newTag()

int AMP::AMP_MPI::newTag ( ) const

Return a new tag.

This routine will return an unused tag for communication. Note that this tag may match a user tag, but this function will not return two duplicate tags. This is a global operation.

◆ operator!=()

bool AMP::AMP_MPI::operator!= ( const AMP_MPI & ) const

Overload operator !=.

Overload operator comm1 != comm2. Two MPI objects are != if they do not share the same communicator. Note: this is a local operation.

◆ operator<()

bool AMP::AMP_MPI::operator< ( const AMP_MPI & ) const

Overload operator <.

Overload operator comm1 < comm2. One MPI object is < another iff all the processors in the first object are also in the second. Additionally, the second object must contain at least one processor that is not in the first object. This is a collective operation, based on the first communicator. As a result all processors on the first communicator will return the same value, while any processors that are not on the first communicator will return an unknown value. Additionally, all processors on the first object MUST call this routine and will be synchronized through this call (there is an internalallReduce).

◆ operator<=()

bool AMP::AMP_MPI::operator<= ( const AMP_MPI & ) const

Overload operator <=.

Overload operator comm1 <= comm2. One MPI object is <= another iff all the processors in the first object are also in the second. This is a collective operation, based on the first communicator. As a result all processors on the first communicator will return the same value, while any processors that are not on the first communicator will return an unknown value. Additionally, all processors on the first object MUST call this routine and will be synchronized through this call (there is an internal allReduce).

◆ operator=() [1/2]

AMP_MPI & AMP::AMP_MPI::operator= ( AMP_MPI && rhs )

Move assignment operator

Parameters

[in] rhs Communicator to copy

◆ operator=() [2/2]

AMP_MPI & AMP::AMP_MPI::operator= ( const AMP_MPI & comm )

Assignment operator.

This operator overloads the assignment to correctly copy an communicator

Parameters

[in] comm Existing MPI object

◆ operator==()

bool AMP::AMP_MPI::operator== ( const AMP_MPI & ) const

Overload operator ==.

Overload operator comm1 == comm2. Two MPI objects are == if they share the same communicator. Note: this is a local operation.

◆ operator>()

bool AMP::AMP_MPI::operator> ( const AMP_MPI & ) const

Overload operator >

Overload operator comm1 > comm2. One MPI object is > another iff all the processors in the second object are also in the first. Additionally, the first object must contain at least one processor that is not in the second object. This is a collective operation, based on the first communicator. As a result all processors on the first communicator will return the same value, while any processors that are not on the first communicator will return an unknown value. Additionally, all processors on the first object MUST call this routine and will be synchronized through this call (there is an internal allReduce).

◆ operator>=()

bool AMP::AMP_MPI::operator>= ( const AMP_MPI & ) const

Overload operator >=.

Overload operator comm1 >= comm2. One MPI object is > another iff all the processors in the second object are also in the first. Additionally, the first object must contain at least one processor that is not in the second object. This is a collective operation, based on the first communicator. As a result all processors on the first communicator will return the same value, while any processors that are not on the first communicator will return an unknown value. Additionally, all processors on the first object MUST call this routine and will be synchronized through this call (there is an internal allReduce).

◆ probe()

std::tuple< int, int, int > AMP::AMP_MPI::probe	(	int	source = `-1`,
		int	tag = `-1`
	)		const

Blocking test for a message.

This function performs a blocking test for a message. It will return the number of bytes in the message when a message with the specified source and tag (on the current communicator) is available

Parameters

[in]	source	source rank (-1: any source)
[in]	tag	tag (-1: any tag)

Returns: Tuple of three ints: matched source, matched tag, number of bytes

◆ queryThreadSupport()

static ThreadSupport AMP::AMP_MPI::queryThreadSupport ( )

static

Query the level of thread support.

◆ rand()

size_t AMP::AMP_MPI::rand ( ) const

Generate a random number.

This generates a random number that is consistent across the comm

◆ recv() [1/3]

template<class type >

type AMP::AMP_MPI::recv	(	int	send,
		int	tag = `0`
	)		const

This function receives an MPI message with a data array from another processor. This call must be paired with a matching call to send.

Parameters

[in]	send	Processor number of sender.
[in]	tag	Optional integer argument specifying a tag which must be matched by the tag of the incoming message. Default tag is 0.

◆ recv() [2/3]

template<class type >

void AMP::AMP_MPI::recv	(	type *	buf,
		int &	length,
		int	send,
		bool	get_length,
		int	tag = `0`
	)		const

This function receives an MPI message with a data array from another processor.

If this processor knows in advance the length of the array, use "get_length = false;" otherwise we will get the return size. This call must be paired with a matching call to send.

Parameters

[in]	buf	Pointer to integer array buffer with capacity of length integers.
[in]	length	If get_length==true: The number of elements to be received, otherwise the maximum number of values that can be stored in buf. On output the number of received elements.
[in]	send	Processor number of sender.
[in]	get_length	Optional boolean argument specifying if we first need to check the message size to get the size of the array. Default value is false.
[in]	tag	Optional integer argument specifying a tag which must be matched by the tag of the incoming message. Default tag is 0.

◆ recv() [3/3]

template<class type >

void AMP::AMP_MPI::recv	(	type *	buf,
		int	length,
		int	send,
		int	tag = `0`
	)		const

This function receives an MPI message with a data array from another processor. This call must be paired with a matching call to send.

Parameters

[in]	buf	Pointer to integer array buffer with capacity of length integers.
[in]	length	The number of elements to be received.
[in]	send	Processor number of sender.
[in]	tag	Optional integer argument specifying a tag which must be matched by the tag of the incoming message. Default tag is 0.

◆ recvBytes()

void AMP::AMP_MPI::recvBytes	(	void *	buf,
		int	N_bytes,
		int	send,
		int	tag = `0`
	)		const

This function receives an MPI message with an array of max size number_bytes (MPI_BYTES) from any processor.

This call must be paired with a matching call to sendBytes.

Parameters

[in]	buf	Void pointer to a buffer of size number_bytes bytes.
[in]	N_bytes	Integer number specifying size of buf in bytes.
[in]	send	Integer number specifying size of buf in bytes.
[in]	tag	Optional integer argument specifying a tag which must be matched by the tag of the incoming message. Default tag is 0.

◆ reset()

void AMP::AMP_MPI::reset ( )

Reset the object.

This resets the object to the empty state without an MPI_Comm

◆ send() [1/2]

template<class type >

void AMP::AMP_MPI::send	(	const type &	data,
		int	recv,
		int	tag = `0`
	)		const

This function sends an MPI message with an array to another processor.

If the receiving processor knows in advance the length of the array, use "send_length = false;" otherwise, this processor will first send the length of the array, then send the data. This call must be paired with a matching call to recv.

Parameters

[in]	data	Data to send
[in]	recv	Receiving processor number.
[in]	tag	Optional integer argument specifying an integer tag to be sent with this message. Default tag is 0. The matching recv must share this tag.

◆ send() [2/2]

template<class type >

void AMP::AMP_MPI::send	(	const type *	buf,
		int	length,
		int	recv,
		int	tag = `0`
	)		const

This function sends an MPI message with an array to another processor.

If the receiving processor knows in advance the length of the array, use "send_length = false;" otherwise, this processor will first send the length of the array, then send the data. This call must be paired with a matching call to recv.

Parameters

[in]	buf	Pointer to array buffer with length integers.
[in]	length	Number of integers in buf that we want to send.
[in]	recv	Receiving processor number.
[in]	tag	Optional integer argument specifying an integer tag to be sent with this message. Default tag is 0. The matching recv must share this tag.

◆ sendBytes()

void AMP::AMP_MPI::sendBytes	(	const void *	buf,
		int	N_bytes,
		int	recv,
		int	tag = `0`
	)		const

This function sends an MPI message with an array of bytes (MPI_BYTES) to receiving_proc_number.

This call must be paired with a matching call to recvBytes.

Parameters

[in]	buf	Void pointer to an array of number_bytes bytes to send.
[in]	N_bytes	Integer number of bytes to send.
[in]	recv	Receiving processor number.
[in]	tag	Optional integer argument specifying an integer tag to be sent with this message. Default tag is 0. The matching recv must share this tag.

◆ sendrecv()

template<class type >

void AMP::AMP_MPI::sendrecv	(	const type *	sendbuf,
		int	sendcount,
		int	dest,
		int	sendtag,
		type *	recvbuf,
		int	recvcount,
		int	source,
		int	recvtag
	)		const

This function sends and recieves data using a blocking call.

Parameters

[in]	sendbuf	Initial address of send buffer (choice).
[in]	sendcount	Number of elements to send (integer).
[in]	dest	Rank of destination (integer).
[in]	sendtag	Send tag (integer).
[out]	recvbuf	Initial address of recv buffer (choice).
[in]	recvcount	Maximum number of elements to receive (integer).
[in]	source	Rank of source (integer).
[in]	recvtag	Receive tag (integer).

◆ serializeStart()

void AMP::AMP_MPI::serializeStart ( ) const

Start a serial region.

This function will serialize MPI processes so that they run one at a time. A call to serializeStart must be followed by a call to serializeStop after the commands to be executed. Note: the ranks will be run in order.

◆ serializeStop()

void AMP::AMP_MPI::serializeStop ( ) const

Stop a serial region.

Stop a serial region. See serializeStart for more information.

◆ setCallAbortInSerialInsteadOfExit()

void AMP::AMP_MPI::setCallAbortInSerialInsteadOfExit ( bool flag = true )

Set boolean flag indicating whether exit or abort is called when running with one processor. Calling this function influences the behavior of calls to abort(). By default, the flag is true meaning that abort() will be called. Passing false means exit(-1) will be called.

◆ setGather()

template<class type >

void AMP::AMP_MPI::setGather ( std::set< type > & set ) const

This function combines sets from different processors to create a single master set

Parameters

[in] set Input/Output std::set for the gather.

◆ setProcessAffinity()

static void AMP::AMP_MPI::setProcessAffinity ( const std::vector< int > & procs )

static

Function to set the affinity of the current process.

◆ sleepBarrier()

void AMP::AMP_MPI::sleepBarrier ( int ms = 10 ) const

Perform a global barrier putting idle processors to sleep.

This function uses an MPI_Ibarrier to start the barrier then waits for completion using sleep.

Parameters

[in] ms Number of ms to sleep between checks for the barrier. Larger waits will allow the CPU to go idle if unused for a long time.

◆ split()

AMP_MPI AMP::AMP_MPI::split	(	int	color,
		int	key = `-1`,
		bool	manage = `true`
	)		const

Split an existing communicator.

This creates a new communicator by splitting an existing communicator. See MPI_Comm_split for information on how the underlying split will occur. Note: the underlying MPI_Comm object will be free'd automatically when it is no longer used by any MPI objects.

Parameters

[in]	color	Control of subset assignment (nonnegative integer). Processes with the same color are in the same new communicator. -1: processor will not be a member of any object (NULL object will be returned)
[in]	key	Control of rank assignment (integer). Note that, for a fixed color, the keys need not be unique. The processes will be sorted in ascending order according to this key, then all the processes in a given color will have the relative rank order as they did in their parent group. (See MPI_Comm_split)
[in]	manage	Do we want to manage the comm (free the MPI_Comm when this object leaves scope)

◆ splitByNode()

AMP_MPI AMP::AMP_MPI::splitByNode	(	int	key = `-1`,
		bool	manage = `true`
	)		const

Split an existing communicator by node.

This creates a new communicator by splitting an existing communicator by the node. This will result in a separate MPI_Comm for each physical node. Internally this will use MPI_Get_processor_name to identify the nodes. Note: the underlying MPI_Comm object will be free'd automatically when it is no longer used by any MPI objects)

Parameters

[in]	key	Control of rank assignment (integer). Note that, for a fixed color, the keys need not be unique. The processes will be sorted in ascending order according to this key, then all the processes in a given color will have the relative rank order as they did in their parent group. (See MPI_Comm_split)
[in]	manage	Do we want to manage the comm (free the MPI_Comm when this object leaves scope)

◆ start_MPI()

static void AMP::AMP_MPI::start_MPI	(	int &	argc,
		char *	argv[],
		int	profile_level = `0`
	)

static

Start MPI.

◆ stop_MPI()

static void AMP::AMP_MPI::stop_MPI ( )

static

Stop MPI.

◆ sumReduce() [1/3]

template<class type >

type AMP::AMP_MPI::sumReduce ( const type & value ) const

Sum Reduce.

This function performs a sum all reduce across all processor. It returns the sum across all processors;

Parameters

[in] value The input value for the all reduce

◆ sumReduce() [2/3]

template<class type >

void AMP::AMP_MPI::sumReduce	(	const type *	x,
		type *	y,
		int	n = `1`
	)		const

Sum Reduce.

Perform an array sum Reduce across all nodes. Each processor contributes an array of values, and the element-wise sum is returned in the same array.

Parameters

[in]	x	The input array for the reduce
[in]	y	The output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)

◆ sumReduce() [3/3]

template<class type >

void AMP::AMP_MPI::sumReduce	(	type *	x,
		int	n = `1`
	)		const

Sum Reduce.

Perform an array sum Reduce across all nodes. Each processor contributes an array of values, and the element-wise sum is returned in the same array.

Parameters

[in]	x	The input/output array for the reduce
[in]	n	The number of values in the array (must match on all nodes)

◆ sumScan() [1/2]

template<class type >

type AMP::AMP_MPI::sumScan ( const type & x ) const

Scan Sum Reduce.

Computes the sum scan (partial reductions) of data on a collection of processes. See MPI_Scan for more information.

Parameters

[in] x The input value for the scan

◆ sumScan() [2/2]

template<class type >

void AMP::AMP_MPI::sumScan	(	const type *	x,
		type *	y,
		int	n
	)		const

Scan Sum Reduce.

Computes the sum scan (partial reductions) of data on a collection of processes. See MPI_Scan for more information.

Parameters

[in]	x	The input array for the scan
[in]	y	The output array for the scan
[in]	n	The number of values in the array (must match on all nodes)

◆ tick()

static double AMP::AMP_MPI::tick ( )

static

Timer resolution.

This function returns the timer resolution used by "time"

◆ time()

static double AMP::AMP_MPI::time ( )

static

Elapsed time.

This function returns the elapsed time on the calling processor since an arbitrary point in the past (seconds). It is a wrapper to MPI_Wtime. See "tick" for the timer resolution in seconds. The time may or may not be synchronized across processors depending on the MPI implementation. Refer to MPI documentation for the desired platform for more information.

◆ version()

static std::array< int, 2 > AMP::AMP_MPI::version ( )

static

Return the MPI version number { major, minor }.

◆ wait() [1/2]

static void AMP::AMP_MPI::wait ( const Request & request )

static

Wait for a communication to finish.

Wait for a communication to finish. Note: this does not require a communicator.

Parameters

[in] request Communication request to wait for (returned for Isend or Irecv)

◆ wait() [2/2]

static void AMP::AMP_MPI::wait ( Request2 request )

static

Wait for a communication to finish.

Wait for a communication to finish. Note: this does not require a communicator.

Parameters

[in] request Communication request to wait for (returned for Isend or Irecv)

◆ waitAll() [1/2]

static void AMP::AMP_MPI::waitAll	(	int	count,
		const Request *	request
	)

static

Wait for all communications to finish.

This function waits for all of the given communication requests to finish. Note: this does not require a communicator.

Parameters

[in]	count	Number of communications to check
[in]	request	Array of communication requests to wait for (returned for Isend or Irecv)

◆ waitAll() [2/2]

static void AMP::AMP_MPI::waitAll	(	int	count,
		Request2 *	request
	)

static

Wait for all communications to finish.

This function waits for all of the given communication requests to finish. Note: this does not require a communicator.

Parameters

[in]	count	Number of communications to check
[in]	request	Array of communication requests to wait for

◆ waitAny() [1/2]

static int AMP::AMP_MPI::waitAny	(	int	count,
		const Request *	request
	)

static

Wait for any communication to finish.

This function waits for any of the given communication requests to finish. It returns the index of the communication request that finished. Note: this does not require a communicator.

Parameters

[in]	count	Number of communications to check
[in]	request	Array of communication requests to wait for (returned for Isend or Irecv)

◆ waitAny() [2/2]

static int AMP::AMP_MPI::waitAny	(	int	count,
		Request2 *	request
	)

static

Wait for any communication to finish.

This function waits for any of the given communication requests to finish. It returns the index of the communication request that finished. Note: this does not require a communicator.

Parameters

[in]	count	Number of communications to check
[in]	request	Array of communication requests to wait for (returned for Isend or Irecv)

◆ waitSome() [1/2]

static std::vector< int > AMP::AMP_MPI::waitSome	(	int	count,
		const Request *	request
	)

static

Wait for some communications to finish.

This function waits for one (or more) communications to finish. It returns an array of the indicies that have finished. Note: this does not require a communicator.

Parameters

[in]	count	Number of communications to check
[in]	request	Array of communication requests to wait for

◆ waitSome() [2/2]

static std::vector< int > AMP::AMP_MPI::waitSome	(	int	count,
		Request2 *	request
	)

static

Wait for some communications to finish.

This function waits for one (or more) communications to finish. It returns an array of the indicies that have finished. Note: this does not require a communicator.

Parameters

[in]	count	Number of communications to check
[in]	request	Array of communication requests to wait for (returned for Isend or Irecv)

Member Data Documentation

◆ commNull

Comm AMP::AMP_MPI::commNull

static

Definition at line 78 of file AMP_MPI.h.

◆ commSelf

Comm AMP::AMP_MPI::commSelf

static

Definition at line 79 of file AMP_MPI.h.

◆ commWorld

Comm AMP::AMP_MPI::commWorld

static

Definition at line 80 of file AMP_MPI.h.

◆ d_call_abort

bool AMP::AMP_MPI::d_call_abort = true

private

Do we want to call MPI_abort instead of exit.

Definition at line 1476 of file AMP_MPI.h.

◆ d_comm

Comm AMP::AMP_MPI::d_comm = commNull

private

The internal MPI communicator.

Definition at line 1473 of file AMP_MPI.h.

◆ d_count

atomic_ptr AMP::AMP_MPI::d_count = nullptr

private

How many objects share the communicator.

Definition at line 1482 of file AMP_MPI.h.

◆ d_currentTag

int_ptr AMP::AMP_MPI::d_currentTag = nullptr

private

The current tag.

Definition at line 1480 of file AMP_MPI.h.

◆ d_hash

uint64_t AMP::AMP_MPI::d_hash = hashNull

private

A unique hash for the comm (consistent across comm)

Definition at line 1479 of file AMP_MPI.h.

◆ d_isNull

bool AMP::AMP_MPI::d_isNull = true

private

Is the communicator NULL.

Definition at line 1474 of file AMP_MPI.h.

◆ d_manage

bool AMP::AMP_MPI::d_manage = false

private

Do we want to manage this communicator.

Definition at line 1475 of file AMP_MPI.h.

◆ d_maxTag

int AMP::AMP_MPI::d_maxTag

staticprivate

The maximum valid tag.

Definition at line 1485 of file AMP_MPI.h.

◆ d_rand

rand_ptr AMP::AMP_MPI::d_rand = nullptr

mutableprivate

Internal random number generator.

Definition at line 1483 of file AMP_MPI.h.

◆ d_rank

int AMP::AMP_MPI::d_rank = 0

private

The rank of the communicator.

Definition at line 1477 of file AMP_MPI.h.

◆ d_ranks

int_ptr AMP::AMP_MPI::d_ranks = nullptr

mutableprivate

The ranks of the comm in the global comm.

Definition at line 1481 of file AMP_MPI.h.

◆ d_size

int AMP::AMP_MPI::d_size = 1

private

The size of the communicator.

Definition at line 1478 of file AMP_MPI.h.

◆ has_MPI

constexpr bool AMP::AMP_MPI::has_MPI = true

staticconstexpr

Definition at line 71 of file AMP_MPI.h.

◆ hashMPI

constexpr uint64_t AMP::AMP_MPI::hashMPI = 0x641118b35a0d87cd

staticconstexpr

Definition at line 85 of file AMP_MPI.h.

◆ hashNull

constexpr uint64_t AMP::AMP_MPI::hashNull = 0xcc6bc5507c132516

staticconstexpr

Definition at line 82 of file AMP_MPI.h.

◆ hashSelf

constexpr uint64_t AMP::AMP_MPI::hashSelf = 0x070b9699a107fe57

staticconstexpr

Definition at line 83 of file AMP_MPI.h.

◆ hashWorld

constexpr uint64_t AMP::AMP_MPI::hashWorld = 0x3d5fdf58e4df5a94

staticconstexpr

Definition at line 84 of file AMP_MPI.h.

◆ N_MPI_Comm_created

atomic_int AMP::AMP_MPI::N_MPI_Comm_created

staticprivate

Number of MPI_Comm objects created over time.

Definition at line 1486 of file AMP_MPI.h.

◆ N_MPI_Comm_destroyed

atomic_int AMP::AMP_MPI::N_MPI_Comm_destroyed

staticprivate

Number of MPI_Comm objects destroyed over time.

Definition at line 1487 of file AMP_MPI.h.

◆ profile_level

short AMP::AMP_MPI::profile_level

staticprivate

The level for the profiles of MPI.

Definition at line 1484 of file AMP_MPI.h.

Referenced by changeProfileLevel().

The documentation for this class was generated from the following file:

src/utils/AMP_MPI.h

Classes

Public Types

Public Member Functions

Static Public Member Functions

Static Public Attributes

Private Types

Private Member Functions

Private Attributes

Static Private Attributes

Detailed Description

Member Typedef Documentation

◆ atomic_int

◆ atomic_ptr

◆ Comm

◆ Datatype

◆ int_ptr

◆ rand_ptr

◆ Request2

Member Enumeration Documentation

◆ ThreadSupport

Constructor & Destructor Documentation

◆ AMP_MPI() [1/4]

◆ ~AMP_MPI()

◆ AMP_MPI() [2/4]

◆ AMP_MPI() [3/4]

◆ AMP_MPI() [4/4]

Member Function Documentation

◆ abort()

◆ allGather() [1/4]

◆ allGather() [2/4]

◆ allGather() [3/4]

◆ allGather() [4/4]

◆ allReduce() [1/2]

◆ allReduce() [2/2]

◆ allToAll() [1/4]

◆ allToAll() [2/4]

◆ allToAll() [3/4]

◆ allToAll() [4/4]

◆ anyReduce() [1/2]

◆ anyReduce() [2/2]

◆ balanceProcesses()

◆ barrier()

◆ bcast() [1/2]

◆ bcast() [2/2]

◆ calcAllToAllDisp() [1/2]

◆ calcAllToAllDisp() [2/2]

◆ changeProfileLevel()

◆ commRanks()

◆ compare()

◆ dup()

◆ gather() [1/3]

◆ gather() [2/3]

◆ gather() [3/3]

◆ getCommunicator()

◆ getNodeName()

◆ getNumberOfProcessors()

◆ getProcessAffinity()

◆ getRand()

◆ getRank()

◆ getSize()

◆ globalRanks()

◆ hash()

◆ hashRanks()

◆ info()

◆ intersect()

◆ Iprobe()

◆ Irecv() [1/2]

◆ Irecv() [2/2]

◆ IrecvBytes()

◆ Isend() [1/2]

◆ Isend() [2/2]

◆ IsendBytes()

◆ isNull()

◆ mapGather()

◆ maxReduce() [1/5]

◆ maxReduce() [2/5]

◆ maxReduce() [3/5]

◆ maxReduce() [4/5]

◆ maxReduce() [5/5]

◆ maxScan() [1/2]