HPCToolkit
ParallelAnalysis.hpp
Go to the documentation of this file.
1 // -*-Mode: C++;-*-
2 
3 // * BeginRiceCopyright *****************************************************
4 //
5 // $HeadURL$
6 // $Id$
7 //
8 // --------------------------------------------------------------------------
9 // Part of HPCToolkit (hpctoolkit.org)
10 //
11 // Information about sources of support for research and development of
12 // HPCToolkit is at 'hpctoolkit.org' and in 'README.Acknowledgments'.
13 // --------------------------------------------------------------------------
14 //
15 // Copyright ((c)) 2002-2019, Rice University
16 // All rights reserved.
17 //
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions are
20 // met:
21 //
22 // * Redistributions of source code must retain the above copyright
23 // notice, this list of conditions and the following disclaimer.
24 //
25 // * Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimer in the
27 // documentation and/or other materials provided with the distribution.
28 //
29 // * Neither the name of Rice University (RICE) nor the names of its
30 // contributors may be used to endorse or promote products derived from
31 // this software without specific prior written permission.
32 //
33 // This software is provided by RICE and contributors "as is" and any
34 // express or implied warranties, including, but not limited to, the
35 // implied warranties of merchantability and fitness for a particular
36 // purpose are disclaimed. In no event shall RICE or contributors be
37 // liable for any direct, indirect, incidental, special, exemplary, or
38 // consequential damages (including, but not limited to, procurement of
39 // substitute goods or services; loss of use, data, or profits; or
40 // business interruption) however caused and on any theory of liability,
41 // whether in contract, strict liability, or tort (including negligence
42 // or otherwise) arising in any way out of the use of this software, even
43 // if advised of the possibility of such damage.
44 //
45 // ******************************************************* EndRiceCopyright *
46 
47 //***************************************************************************
48 //
49 // File:
50 // $HeadURL$
51 //
52 // Purpose:
53 // [The purpose of this file]
54 //
55 // Description:
56 // [The set of functions, macros, etc. defined in the file]
57 //
58 //***************************************************************************
59 
60 #ifndef ParallelAnalysis_hpp
61 #define ParallelAnalysis_hpp
62 
63 //**************************** MPI Include Files ****************************
64 
65 #include <mpi.h>
66 
67 //************************* System Include Files ****************************
68 
69 #include <iostream>
70 #include <string>
71 #include <vector>
72 
73 #include <cstring> // for memset()
74 
75 #include <stdint.h>
76 
77 //*************************** User Include Files ****************************
78 
79 #include <include/uint.h>
80 
82 
83 #include <lib/support/Unique.hpp>
84 
85 //*************************** Forward Declarations **************************
86 
87 //***************************************************************************
88 // PackedMetrics: a packable matrix
89 //***************************************************************************
90 
91 namespace ParallelAnalysis {
92 
94  : public Unique // prevent copying
95 {
96 public:
97  // [mBegId, mEndId)
100  : m_numNodes(numNodes), m_numMetrics(mEndId - mBegId),
101  m_mBegId(mBegId), m_mEndId(mEndId),
102  m_mDrvdBegId(mDrvdBegId), m_mDrvdEndId(mDrvdEndId)
103  {
104  size_t sz = dataSize();
105  m_packedData = new double[sz];
106 
107  // initialize first (unused) row to avoid bogus valgrind warnings
108  memset(m_packedData, 0, (m_numHdr + m_numMetrics) * sizeof(double));
109 
111  m_packedData[m_mBegIdIdx] = (double)m_mBegId;
112  m_packedData[m_mEndIdIdx] = (double)m_mEndId;
113  }
114 
115  // PackedMetrics(double* packedMatrix) { }
116 
118  { delete[] m_packedData; }
119 
120 
121  // 0 based indexing (row-major layout)
122  double
123  idx(uint idxNodes, uint idxMetrics) const
124  { return m_packedData[m_numHdr + (m_numMetrics * idxNodes) + idxMetrics]; }
125 
126 
127  double&
128  idx(uint idxNodes, uint idxMetrics)
129  { return m_packedData[m_numHdr + (m_numMetrics * idxNodes) + idxMetrics]; }
130 
131 
132  uint
133  numNodes() const
134  { return m_numNodes; }
135 
136  uint
137  numMetrics() const
138  { return m_numMetrics; }
139 
140 
141  uint
142  mBegId() const
143  { return m_mBegId; }
144 
145  uint
146  mEndId() const
147  { return m_mEndId; }
148 
149 
150  uint
151  mDrvdBegId() const
152  { return m_mDrvdBegId; }
153 
154  uint
155  mDrvdEndId() const
156  { return m_mDrvdEndId; }
157 
158 
159  bool
160  verify() const
161  {
165  }
166 
167 
168  double*
169  data() const
170  { return m_packedData; }
171 
172  // dataSize: size in terms of elements
173  uint
174  dataSize() const
175  { return (m_numNodes * m_numMetrics) + m_numHdr; }
176 
177 private:
178  static const uint m_numHdr = 4;
179  static const uint m_numNodesIdx = 0;
180  static const uint m_mBegIdIdx = 1;
181  static const uint m_mEndIdIdx = 2;
182 
183  uint m_numNodes; // rows
184  uint m_numMetrics; // columns
186 
188 
189  double* m_packedData; // use row-major layout
190 
191 };
192 
193 } // namespace ParallelAnalysis
194 
195 
196 //***************************************************************************
197 // reduce/broadcast
198 //***************************************************************************
199 
200 namespace ParallelAnalysis {
201 
202 // ------------------------------------------------------------------------
203 // recvMerge: merge profile on rank_y into profile on rank_x
204 // ------------------------------------------------------------------------
205 
206 void
208  int dest, int myRank, MPI_Comm comm = MPI_COMM_WORLD);
209 void
211  int src, int myRank, MPI_Comm comm = MPI_COMM_WORLD);
212 
213 void
216  int dest, int myRank, MPI_Comm comm = MPI_COMM_WORLD);
217 void
220  int src, int myRank, MPI_Comm comm = MPI_COMM_WORLD);
221 
222 void
223 packSend(StringSet *stringSet,
224  int dest, int myRank, MPI_Comm comm = MPI_COMM_WORLD);
225 void
226 recvMerge(StringSet *stringSet,
227  int src, int myRank, MPI_Comm comm = MPI_COMM_WORLD);
228 
229 // ------------------------------------------------------------------------
230 // reduce: Uses a tree-based reduction to reduce the profile at every
231 // rank into a canonical profile at the tree's root, rank 0. Assumes
232 // 0-based ranks.
233 //
234 // T: Prof::CallPath::Profile*
235 // T: std::pair<Prof::CallPath::Profile*, ParallelAnalysis::PackedMetrics*>
236 // ------------------------------------------------------------------------
237 
238 template<typename T>
239 void
240 reduce(T object, int myRank, int numRanks, MPI_Comm comm = MPI_COMM_WORLD)
241 {
242  int lchild = 2 * myRank + 1;
243  if (lchild < numRanks) {
244  recvMerge(object, lchild, myRank);
245  int rchild = 2 * myRank + 2;
246  if (rchild < numRanks) {
247  recvMerge(object, rchild, myRank);
248  }
249  }
250  if (myRank > 0) {
251  int parent = (myRank - 1) / 2;
252  packSend(object, parent, myRank);
253  }
254 }
255 
256 
257 // ------------------------------------------------------------------------
258 // broadcast: Broadcast the profile at the tree's root (rank 0) to every
259 // other rank. Assumes 0-based ranks.
260 // ------------------------------------------------------------------------
261 void
262 broadcast(Prof::CallPath::Profile*& profile, int myRank,
263  MPI_Comm comm = MPI_COMM_WORLD);
264 void
265 broadcast(StringSet &stringSet, int myRank,
266  MPI_Comm comm = MPI_COMM_WORLD);
267 
268 // ------------------------------------------------------------------------
269 // pack/unpack a profile to/from a buffer
270 // ------------------------------------------------------------------------
271 void
272 packProfile(const Prof::CallPath::Profile& profile,
273  uint8_t** buffer, size_t* bufferSz);
274 
275 
277 unpackProfile(uint8_t* buffer, size_t bufferSz);
278 
279 
280 // ------------------------------------------------------------------------
281 // pack/unpack a metrics from/to a profile
282 // ------------------------------------------------------------------------
283 
284 // packMetrics: pack the given metric values from 'profile' into
285 // 'packedMetrics'
286 void
287 packMetrics(const Prof::CallPath::Profile& profile,
288  ParallelAnalysis::PackedMetrics& packedMetrics);
289 
290 // unpackMetrics: unpack 'packedMetrics' into profile and apply metric update
291 void
293  const ParallelAnalysis::PackedMetrics& packedMetrics);
294 
295 } // namespace ParallelAnalysis
296 
297 
298 //***************************************************************************
299 
300 #endif // ParallelAnalysis_hpp
PackedMetrics(uint numNodes, uint mBegId, uint mEndId, uint mDrvdBegId, uint mDrvdEndId)
void reduce(T object, int myRank, int numRanks, MPI_Comm comm=MPI_COMM_WORLD)
void recvMerge(Prof::CallPath::Profile *profile, int src, int myRank, MPI_Comm comm)
void(* T)(int code, va_list_box *box, int put(int c, void *cl), void *cl, unsigned char flags[256], int width, int precision)
Definition: fmt.h:62
double & idx(uint idxNodes, uint idxMetrics)
unsigned int uint
Definition: uint.h:124
void unpackMetrics(Prof::CallPath::Profile &profile, const ParallelAnalysis::PackedMetrics &packedMetrics)
void broadcast(Prof::CallPath::Profile *&profile, int myRank, MPI_Comm comm)
Prof::CallPath::Profile * unpackProfile(uint8_t *buffer, size_t bufferSz)
void packSend(Prof::CallPath::Profile *profile, int dest, int myRank, MPI_Comm comm)
void packProfile(const Prof::CallPath::Profile &profile, uint8_t **buffer, size_t *bufferSz)
double idx(uint idxNodes, uint idxMetrics) const
void packMetrics(const Prof::CallPath::Profile &profile, ParallelAnalysis::PackedMetrics &packedMetrics)