xref: /petsc/src/sys/objects/tagm.c (revision 834855d6effb0d027771461c8e947ee1ce5a1e17)
1 #include <petsc/private/petscimpl.h> /*I    "petscsys.h"   I*/
2 #include <petsc/private/hashmapobj.h>
3 #include <petsc/private/garbagecollector.h>
4 
5 /*
6    A simple way to manage tags inside a communicator.
7 
8    It uses the attributes to determine if a new communicator
9       is needed and to store the available tags.
10 
11 */
12 
13 /*@
14   PetscObjectGetNewTag - Gets a unique new tag from a PETSc object. All
15   processors that share the object MUST call this routine EXACTLY the same
16   number of times.  This tag should only be used with the current objects
17   communicator; do NOT use it with any other MPI communicator.
18 
19   Collective
20 
21   Input Parameter:
22 . obj - the PETSc object; this must be cast with a (`PetscObject`), for example,
23          `PetscObjectGetNewTag`((`PetscObject`)mat,&tag);
24 
25   Output Parameter:
26 . tag - the new tag
27 
28   Level: developer
29 
30   Note:
31   This tag is needed if one is writing MPI communication code involving message passing and needs unique MPI tags to ensure the messages are connected to this specific
32   object.
33 
34 .seealso: `PetscCommGetNewTag()`
35 @*/
PetscObjectGetNewTag(PetscObject obj,PetscMPIInt * tag)36 PetscErrorCode PetscObjectGetNewTag(PetscObject obj, PetscMPIInt *tag)
37 {
38   PetscFunctionBegin;
39   PetscCall(PetscCommGetNewTag(obj->comm, tag));
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 /*@
44   PetscCommGetNewTag - Gets a unique new tag from a PETSc communicator
45 
46   Collective
47 
48   Input Parameter:
49 . comm - the MPI communicator
50 
51   Output Parameter:
52 . tag - the new tag
53 
54   Level: developer
55 
56   Notes:
57   All processors that share the communicator MUST call this routine EXACTLY the same number of
58   times. This tag should only be used with the current objects communicator; do NOT use it
59   with any other MPI communicator.
60 
61 .seealso: `PetscObjectGetNewTag()`, `PetscCommDuplicate()`
62 @*/
PetscCommGetNewTag(MPI_Comm comm,PetscMPIInt * tag)63 PetscErrorCode PetscCommGetNewTag(MPI_Comm comm, PetscMPIInt *tag)
64 {
65   PetscCommCounter *counter;
66   PetscMPIInt      *maxval, flg;
67 
68   PetscFunctionBegin;
69   PetscAssertPointer(tag, 2);
70 
71   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
72   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Bad MPI communicator supplied; must be a PETSc communicator");
73 
74   if (counter->tag < 1) {
75     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
76     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
77     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
78     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
79   }
80 
81   *tag = counter->tag--;
82   if (PetscDefined(USE_DEBUG)) {
83     /*
84      Hanging here means that some processes have called PetscCommGetNewTag() and others have not.
85      */
86     PetscCallMPI(MPI_Barrier(comm));
87   }
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
91 /*@C
92   PetscCommGetComm - get a new MPI communicator from a PETSc communicator that can be passed off to another package
93 
94   Collective
95 
96   Input Parameter:
97 . comm_in - Input communicator
98 
99   Output Parameter:
100 . comm_out - Output communicator
101 
102   Level: developer
103 
104   Notes:
105   Use `PetscCommRestoreComm()` to return the communicator when the external package no longer needs it
106 
107   Certain MPI implementations have `MPI_Comm_free()` that do not work, thus one can run out of available MPI communicators causing
108   mysterious crashes in the code after running a long time. This routine allows reusing previously obtained MPI communicators that
109   are no longer needed.
110 
111 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`, `PetscCommRestoreComm()`
112 @*/
PetscCommGetComm(MPI_Comm comm_in,MPI_Comm * comm_out)113 PetscErrorCode PetscCommGetComm(MPI_Comm comm_in, MPI_Comm *comm_out)
114 {
115   PetscCommCounter *counter;
116   PetscMPIInt       flg;
117 
118   PetscFunctionBegin;
119   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
120   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
121   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
122 
123   if (counter->comms) {
124     struct PetscCommStash *pcomms = counter->comms;
125 
126     *comm_out      = pcomms->comm;
127     counter->comms = pcomms->next;
128     PetscCall(PetscFree(pcomms));
129     PetscCall(PetscInfo(NULL, "Reusing a communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
130   } else {
131     PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
132   }
133   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*@C
138   PetscCommRestoreComm - restores an MPI communicator that was obtained with `PetscCommGetComm()`
139 
140   Collective
141 
142   Input Parameters:
143 + comm_in  - Input communicator
144 - comm_out - returned communicator
145 
146   Level: developer
147 
148 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
149 @*/
PetscCommRestoreComm(MPI_Comm comm_in,MPI_Comm * comm_out)150 PetscErrorCode PetscCommRestoreComm(MPI_Comm comm_in, MPI_Comm *comm_out)
151 {
152   PetscCommCounter      *counter;
153   PetscMPIInt            flg;
154   struct PetscCommStash *pcomms, *ncomm;
155 
156   PetscFunctionBegin;
157   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
158   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
159   PetscCheck(flg, comm_in, PETSC_ERR_ARG_WRONGSTATE, "Requires a PETSc communicator as input, do not use something like MPI_COMM_WORLD");
160 
161   PetscCall(PetscMalloc(sizeof(struct PetscCommStash), &ncomm));
162   ncomm->comm = *comm_out;
163   ncomm->next = NULL;
164   pcomms      = counter->comms;
165   while (pcomms && pcomms->next) pcomms = pcomms->next;
166   if (pcomms) {
167     pcomms->next = ncomm;
168   } else {
169     counter->comms = ncomm;
170   }
171   *comm_out = 0;
172   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
173   PetscFunctionReturn(PETSC_SUCCESS);
174 }
175 
176 /*@C
177   PetscCommDuplicate - Duplicates the communicator only if it is not already a PETSc communicator.
178 
179   Collective
180 
181   Input Parameter:
182 . comm_in - Input communicator
183 
184   Output Parameters:
185 + comm_out  - Output communicator.  May be `comm_in`.
186 - first_tag - Tag available that has not already been used with this communicator (you may pass in `NULL` if you do not need a tag)
187 
188   Level: developer
189 
190   Note:
191   PETSc communicators are just regular MPI communicators that keep track of which
192   tags have been used to prevent tag conflict. If you pass a non-PETSc communicator into
193   a PETSc creation routine it will attach a private communicator for use in the objects communications.
194   The internal `MPI_Comm` is used to perform all the MPI calls for PETSc, the outer `MPI_Comm` is a user
195   and is not used by PETSc.
196 
197 .seealso: `PetscObjectGetNewTag()`, `PetscCommGetNewTag()`, `PetscCommDestroy()`
198 @*/
PetscCommDuplicate(MPI_Comm comm_in,MPI_Comm * comm_out,PetscMPIInt * first_tag)199 PetscErrorCode PetscCommDuplicate(MPI_Comm comm_in, MPI_Comm *comm_out, PetscMPIInt *first_tag)
200 {
201   PetscInt64       *cidx;
202   PetscCommCounter *counter;
203   PetscMPIInt      *maxval, flg;
204 
205   PetscFunctionBegin;
206   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
207   PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_Counter_keyval, &counter, &flg));
208 
209   if (!flg) { /* this is NOT a PETSc comm */
210     union
211     {
212       MPI_Comm comm;
213       void    *ptr;
214     } ucomm;
215     /* check if this communicator has a PETSc communicator embedded in it */
216     PetscCallMPI(MPI_Comm_get_attr(comm_in, Petsc_InnerComm_keyval, &ucomm, &flg));
217     if (!flg) {
218       /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
219       PetscCallMPI(MPI_Comm_dup(comm_in, comm_out));
220       PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
221       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
222       PetscCall(PetscNew(&counter)); /* all fields of counter are zero'ed */
223       counter->tag = *maxval;
224       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_Counter_keyval, counter));
225       /* Add an object creation index to the communicator */
226       PetscCall(PetscNew(&cidx));
227       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_CreationIdx_keyval, cidx));
228       PetscCall(PetscInfo(NULL, "Duplicating a communicator %ld %ld max tags = %d\n", (long)comm_in, (long)*comm_out, *maxval));
229 
230       /* save PETSc communicator inside user communicator, so we can get it next time */
231       ucomm.comm = *comm_out; /* ONLY the comm part of the union is significant. */
232       PetscCallMPI(MPI_Comm_set_attr(comm_in, Petsc_InnerComm_keyval, ucomm.ptr));
233       ucomm.comm = comm_in;
234       PetscCallMPI(MPI_Comm_set_attr(*comm_out, Petsc_OuterComm_keyval, ucomm.ptr));
235     } else {
236       *comm_out = ucomm.comm;
237       /* pull out the inner MPI_Comm and hand it back to the caller */
238       PetscCallMPI(MPI_Comm_get_attr(*comm_out, Petsc_Counter_keyval, &counter, &flg));
239       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
240       PetscCall(PetscInfo(NULL, "Using internal PETSc communicator %ld %ld\n", (long)comm_in, (long)*comm_out));
241     }
242   } else *comm_out = comm_in;
243 
244   if (PetscDefined(USE_DEBUG)) {
245     /*
246      Hanging here means that some processes have called PetscCommDuplicate() and others have not.
247      This likely means that a subset of processes in a MPI_Comm have attempted to create a PetscObject!
248      ALL processes that share a communicator MUST shared objects created from that communicator.
249      */
250     PetscCallMPI(MPI_Barrier(comm_in));
251   }
252 
253   if (counter->tag < 1) {
254     PetscCall(PetscInfo(NULL, "Out of tags for object, starting to recycle. Comm reference count %" PetscInt_FMT "\n", counter->refcount));
255     PetscCallMPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &maxval, &flg));
256     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_LIB, "MPI error: MPI_Comm_get_attr() is not returning a MPI_TAG_UB");
257     counter->tag = *maxval - 128; /* hope that any still active tags were issued right at the beginning of the run */
258   }
259 
260   if (first_tag) *first_tag = counter->tag--;
261 
262   counter->refcount++; /* number of references to this comm */
263   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
264   PetscFunctionReturn(PETSC_SUCCESS);
265 }
266 
267 /*@C
268   PetscCommDestroy - Frees communicator obtained with `PetscCommDuplicate()`.
269 
270   Collective
271 
272   Input Parameter:
273 . comm - the communicator to free
274 
275   Level: developer
276 
277   Notes:
278   Sets `comm` to `NULL`
279 
280   The communicator is reference counted so it is only truly removed from the system when its reference count drops to zero
281 
282 .seealso: `PetscCommDuplicate()`
283 @*/
PetscCommDestroy(MPI_Comm * comm)284 PetscErrorCode PetscCommDestroy(MPI_Comm *comm)
285 {
286   PetscInt64       *cidx;
287   PetscCommCounter *counter;
288   PetscMPIInt       flg;
289   PetscGarbage      garbage;
290   MPI_Comm          icomm = *comm, ocomm;
291   union
292   {
293     MPI_Comm comm;
294     void    *ptr;
295   } ucomm;
296 
297   PetscFunctionBegin;
298   if (*comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS);
299   PetscCall(PetscSpinlockLock(&PetscCommSpinLock));
300   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
301   if (!flg) { /* not a PETSc comm, check if it has an inner comm */
302     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_InnerComm_keyval, &ucomm, &flg));
303     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have tag/name counter nor does it have inner MPI_Comm");
304     icomm = ucomm.comm;
305     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Counter_keyval, &counter, &flg));
306     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Inner MPI_Comm does not have expected tag/name counter, problem with corrupted memory");
307   }
308   counter->refcount--;
309   if (!counter->refcount) {
310     /* if MPI_Comm has outer comm then remove reference to inner MPI_Comm from outer MPI_Comm */
311     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_OuterComm_keyval, &ucomm, &flg));
312     if (flg) {
313       ocomm = ucomm.comm;
314       PetscCallMPI(MPI_Comm_get_attr(ocomm, Petsc_InnerComm_keyval, &ucomm, &flg));
315       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Outer MPI_Comm %ld does not have expected reference to inner comm %ld, problem with corrupted memory", (long int)ocomm, (long int)icomm);
316       PetscCallMPI(MPI_Comm_delete_attr(ocomm, Petsc_InnerComm_keyval));
317     }
318 
319     /* Remove the object creation index on the communicator */
320     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_CreationIdx_keyval, &cidx, &flg));
321     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "MPI_Comm does not have object creation index");
322     PetscCall(PetscFree(cidx));
323 
324     /* Remove garbage hashmap set up by garbage collection */
325     PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Garbage_HMap_keyval, &garbage, &flg));
326     if (flg) {
327       PetscInt entries = 0;
328       PetscCall(PetscHMapObjGetSize(garbage.map, &entries));
329       if (entries > 0) PetscCall(PetscGarbageCleanup(icomm));
330       PetscCall(PetscHMapObjDestroy(&garbage.map));
331     }
332 
333     PetscCall(PetscInfo(NULL, "Deleting PETSc MPI_Comm %ld\n", (long)icomm));
334     PetscCallMPI(MPI_Comm_free(&icomm));
335   }
336   *comm = MPI_COMM_NULL;
337   PetscCall(PetscSpinlockUnlock(&PetscCommSpinLock));
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340 
341 /*@C
342   PetscObjectsListGetGlobalNumbering - computes a global numbering
343   of `PetscObject`s living on subcommunicators of a given communicator.
344 
345   Collective.
346 
347   Input Parameters:
348 + comm    - the `MPI_Comm`
349 . len     - local length of `objlist`
350 - objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank
351               (subcomm ordering is assumed to be deadlock-free)
352 
353   Output Parameters:
354 + count     - global number of distinct subcommunicators on objlist (may be > `len`)
355 - numbering - global numbers of objlist entries (allocated by user)
356 
357   Level: developer
358 
359   Note:
360   This is needed when PETSc is used with certain languages that do garbage collection to manage object life cycles.
361 
362 .seealso: `PetscCommDuplicate()`, `PetscObjectDestroy()`
363 @*/
PetscObjectsListGetGlobalNumbering(MPI_Comm comm,PetscInt len,PetscObject objlist[],PetscInt * count,PetscInt * numbering)364 PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject objlist[], PetscInt *count, PetscInt *numbering)
365 {
366   PetscInt    i, roots, offset;
367   PetscMPIInt size, rank;
368 
369   PetscFunctionBegin;
370   PetscAssertPointer(objlist, 3);
371   if (!count && !numbering) PetscFunctionReturn(PETSC_SUCCESS);
372 
373   PetscCallMPI(MPI_Comm_size(comm, &size));
374   PetscCallMPI(MPI_Comm_rank(comm, &rank));
375   roots = 0;
376   for (i = 0; i < len; ++i) {
377     PetscMPIInt srank;
378     PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
379     /* Am I the root of the i-th subcomm? */
380     if (!srank) ++roots;
381   }
382   if (count) {
383     /* Obtain the sum of all roots -- the global number of distinct subcomms. */
384     PetscCallMPI(MPIU_Allreduce(&roots, count, 1, MPIU_INT, MPI_SUM, comm));
385   }
386   if (numbering) {
387     /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */
388     /*
389       At each subcomm root number all of the subcomms it owns locally
390       and make it global by calculating the shift among all of the roots.
391       The roots are ordered using the comm ordering.
392     */
393     PetscCallMPI(MPI_Scan(&roots, &offset, 1, MPIU_INT, MPI_SUM, comm));
394     offset -= roots;
395     /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/
396     /*
397       This is where the assumption of a deadlock-free ordering of the subcomms is assumed:
398       broadcast is collective on the subcomm.
399     */
400     roots = 0;
401     for (i = 0; i < len; ++i) {
402       PetscMPIInt srank;
403       numbering[i] = offset + roots; /* only meaningful if !srank. */
404 
405       PetscCallMPI(MPI_Comm_rank(objlist[i]->comm, &srank));
406       PetscCallMPI(MPI_Bcast(numbering + i, 1, MPIU_INT, 0, objlist[i]->comm));
407       if (!srank) ++roots;
408     }
409   }
410   PetscFunctionReturn(PETSC_SUCCESS);
411 }
412