11447629fSBarry Smith /*
21447629fSBarry Smith The memory scalable AO application ordering routines. These store the
3d38ec673SBarry Smith orderings on each process for that process' range of values, this is more memory-efficient than `AOBASIC`
41447629fSBarry Smith */
51447629fSBarry Smith
61447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h> /*I "petscao.h" I*/
71447629fSBarry Smith
81447629fSBarry Smith typedef struct {
91447629fSBarry Smith PetscInt *app_loc; /* app_loc[i] is the partner for the ith local PETSc slot */
101447629fSBarry Smith PetscInt *petsc_loc; /* petsc_loc[j] is the partner for the jth local app slot */
111447629fSBarry Smith PetscLayout map; /* determines the local sizes of ao */
121447629fSBarry Smith } AO_MemoryScalable;
131447629fSBarry Smith
141447629fSBarry Smith /*
156bd6ae52SBarry Smith All processors ship the data to process 0 to be printed; note that this is not scalable because
166bd6ae52SBarry Smith process 0 allocates space for all the orderings entry across all the processes
171447629fSBarry Smith */
AOView_MemoryScalable(AO ao,PetscViewer viewer)18da8c939bSJacob Faibussowitsch static PetscErrorCode AOView_MemoryScalable(AO ao, PetscViewer viewer)
19d71ae5a4SJacob Faibussowitsch {
201447629fSBarry Smith PetscMPIInt rank, size;
211447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
22*9f196a02SMartin Diehl PetscBool isascii;
231447629fSBarry Smith PetscMPIInt tag_app, tag_petsc;
241447629fSBarry Smith PetscLayout map = aomems->map;
256497c311SBarry Smith PetscInt *app, *app_loc, *petsc, *petsc_loc, len, j;
261447629fSBarry Smith MPI_Status status;
271447629fSBarry Smith
281447629fSBarry Smith PetscFunctionBegin;
29*9f196a02SMartin Diehl PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
30*9f196a02SMartin Diehl PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not supported for AO MemoryScalable", ((PetscObject)viewer)->type_name);
311447629fSBarry Smith
329566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ao), &rank));
339566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ao), &size));
341447629fSBarry Smith
359566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_app));
369566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag_petsc));
371447629fSBarry Smith
38dd400576SPatrick Sanan if (rank == 0) {
399566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Number of elements in ordering %" PetscInt_FMT "\n", ao->N));
409566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "PETSc->App App->PETSc\n"));
411447629fSBarry Smith
429566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(map->N, &app, map->N, &petsc));
431447629fSBarry Smith len = map->n;
441447629fSBarry Smith /* print local AO */
459566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
466497c311SBarry Smith for (PetscInt i = 0; i < len; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT " %3" PetscInt_FMT " %3" PetscInt_FMT " %3" PetscInt_FMT "\n", i, aomems->app_loc[i], i, aomems->petsc_loc[i]));
471447629fSBarry Smith
481447629fSBarry Smith /* recv and print off-processor's AO */
496497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) {
501447629fSBarry Smith len = map->range[i + 1] - map->range[i];
511447629fSBarry Smith app_loc = app + map->range[i];
521447629fSBarry Smith petsc_loc = petsc + map->range[i];
536497c311SBarry Smith PetscCallMPI(MPIU_Recv(app_loc, len, MPIU_INT, i, tag_app, PetscObjectComm((PetscObject)ao), &status));
546497c311SBarry Smith PetscCallMPI(MPIU_Recv(petsc_loc, len, MPIU_INT, i, tag_petsc, PetscObjectComm((PetscObject)ao), &status));
556497c311SBarry Smith PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", i));
5648a46eb9SPierre Jolivet for (j = 0; j < len; j++) PetscCall(PetscViewerASCIIPrintf(viewer, "%3" PetscInt_FMT " %3" PetscInt_FMT " %3" PetscInt_FMT " %3" PetscInt_FMT "\n", map->range[i] + j, app_loc[j], map->range[i] + j, petsc_loc[j]));
571447629fSBarry Smith }
589566063dSJacob Faibussowitsch PetscCall(PetscFree2(app, petsc));
591447629fSBarry Smith
601447629fSBarry Smith } else {
611447629fSBarry Smith /* send values */
626497c311SBarry Smith PetscCallMPI(MPIU_Send((void *)aomems->app_loc, map->n, MPIU_INT, 0, tag_app, PetscObjectComm((PetscObject)ao)));
636497c311SBarry Smith PetscCallMPI(MPIU_Send((void *)aomems->petsc_loc, map->n, MPIU_INT, 0, tag_petsc, PetscObjectComm((PetscObject)ao)));
641447629fSBarry Smith }
659566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer));
663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
671447629fSBarry Smith }
681447629fSBarry Smith
AODestroy_MemoryScalable(AO ao)69da8c939bSJacob Faibussowitsch static PetscErrorCode AODestroy_MemoryScalable(AO ao)
70d71ae5a4SJacob Faibussowitsch {
711447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
721447629fSBarry Smith
731447629fSBarry Smith PetscFunctionBegin;
749566063dSJacob Faibussowitsch PetscCall(PetscFree2(aomems->app_loc, aomems->petsc_loc));
759566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aomems->map));
769566063dSJacob Faibussowitsch PetscCall(PetscFree(aomems));
773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
781447629fSBarry Smith }
791447629fSBarry Smith
801447629fSBarry Smith /*
811447629fSBarry Smith Input Parameters:
821447629fSBarry Smith + ao - the application ordering context
831447629fSBarry Smith . n - the number of integers in ia[]
841447629fSBarry Smith . ia - the integers; these are replaced with their mapped value
851447629fSBarry Smith - maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
861447629fSBarry Smith
871447629fSBarry Smith Output Parameter:
881447629fSBarry Smith . ia - the mapped interges
891447629fSBarry Smith */
AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt * ia,const PetscInt * maploc)90da8c939bSJacob Faibussowitsch static PetscErrorCode AOMap_MemoryScalable_private(AO ao, PetscInt n, PetscInt *ia, const PetscInt *maploc)
91d71ae5a4SJacob Faibussowitsch {
921447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
931447629fSBarry Smith MPI_Comm comm;
946497c311SBarry Smith PetscMPIInt rank, size, tag1, tag2, count;
956497c311SBarry Smith PetscMPIInt *owner, j;
966497c311SBarry Smith PetscInt nmax, *sindices, *rindices, idx, lastidx, *sindices2, *rindices2, *sizes, *start;
976497c311SBarry Smith PetscMPIInt nreceives, nsends;
986bd6ae52SBarry Smith const PetscInt *owners = aomems->map->range;
991447629fSBarry Smith MPI_Request *send_waits, *recv_waits, *send_waits2, *recv_waits2;
1001447629fSBarry Smith MPI_Status recv_status;
1016497c311SBarry Smith PetscMPIInt source, widx;
1026497c311SBarry Smith PetscCount nindices;
1036497c311SBarry Smith PetscInt *rbuf, *sbuf, inreceives;
1041447629fSBarry Smith MPI_Status *send_status, *send_status2;
1051447629fSBarry Smith
1061447629fSBarry Smith PetscFunctionBegin;
1079566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)ao, &comm));
1089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
1099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
1101447629fSBarry Smith
1111447629fSBarry Smith /* first count number of contributors to each processor */
1129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &start));
1139566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(2 * size, &sizes, n, &owner));
1141447629fSBarry Smith
1151447629fSBarry Smith j = 0;
1161447629fSBarry Smith lastidx = -1;
1176497c311SBarry Smith for (PetscInt i = 0; i < n; i++) {
1186bd6ae52SBarry Smith if (ia[i] < 0) owner[i] = -1; /* mark negative entries (which are not to be mapped) with a special negative value */
1196bd6ae52SBarry Smith if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
1206bd6ae52SBarry Smith else {
1211447629fSBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */
1221447629fSBarry Smith if (lastidx > (idx = ia[i])) j = 0;
1231447629fSBarry Smith lastidx = idx;
1241447629fSBarry Smith for (; j < size; j++) {
1251447629fSBarry Smith if (idx >= owners[j] && idx < owners[j + 1]) {
12676ec1555SBarry Smith sizes[2 * j]++; /* num of indices to be sent */
12776ec1555SBarry Smith sizes[2 * j + 1] = 1; /* send to proc[j] */
1281447629fSBarry Smith owner[i] = j;
1291447629fSBarry Smith break;
1301447629fSBarry Smith }
1311447629fSBarry Smith }
1321447629fSBarry Smith }
1336bd6ae52SBarry Smith }
13476ec1555SBarry Smith sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
1351447629fSBarry Smith nsends = 0;
1366497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) nsends += sizes[2 * i + 1];
1371447629fSBarry Smith
1381447629fSBarry Smith /* inform other processors of number of messages and max length*/
1396497c311SBarry Smith PetscCall(PetscMaxSum(comm, sizes, &nmax, &inreceives));
1406497c311SBarry Smith PetscCall(PetscMPIIntCast(inreceives, &nreceives));
1411447629fSBarry Smith
1421447629fSBarry Smith /* allocate arrays */
1439566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag1));
1449566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag2));
1451447629fSBarry Smith
1469566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
1479566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nsends * nmax, &rindices2, nsends, &recv_waits2));
1481447629fSBarry Smith
1499566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(n, &sindices, nsends, &send_waits, nsends, &send_status));
1509566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(n, &sindices2, nreceives, &send_waits2, nreceives, &send_status2));
1511447629fSBarry Smith
1521447629fSBarry Smith /* post 1st receives: receive others requests
1531447629fSBarry Smith since we don't know how long each individual message is we
1541447629fSBarry Smith allocate the largest needed buffer for each receive. Potentially
1551447629fSBarry Smith this is a lot of wasted space.
1561447629fSBarry Smith */
1576497c311SBarry Smith for (PetscMPIInt i = 0; i < nreceives; i++) PetscCallMPI(MPIU_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag1, comm, recv_waits + i));
1581447629fSBarry Smith
1591447629fSBarry Smith /* do 1st sends:
1601447629fSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to
1611447629fSBarry Smith the ith processor
1621447629fSBarry Smith */
1631447629fSBarry Smith start[0] = 0;
1646497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1656497c311SBarry Smith for (PetscInt i = 0; i < n; i++) {
1661447629fSBarry Smith j = owner[i];
1676bd6ae52SBarry Smith if (j == -1) continue; /* do not remap negative entries in ia[] */
1689371c9d4SSatish Balay else if (j == -2) { /* out of range entries get mapped to -1 */ ia[i] = -1;
1696bd6ae52SBarry Smith continue;
1706bd6ae52SBarry Smith } else if (j != rank) {
1711447629fSBarry Smith sindices[start[j]++] = ia[i];
1721447629fSBarry Smith } else { /* compute my own map */
1731447629fSBarry Smith ia[i] = maploc[ia[i] - owners[rank]];
1741447629fSBarry Smith }
1751447629fSBarry Smith }
1761447629fSBarry Smith
1771447629fSBarry Smith start[0] = 0;
1786497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
1796497c311SBarry Smith count = 0;
1806497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) {
18176ec1555SBarry Smith if (sizes[2 * i + 1]) {
1821447629fSBarry Smith /* send my request to others */
1836497c311SBarry Smith PetscCallMPI(MPIU_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag1, comm, send_waits + count));
1841447629fSBarry Smith /* post receive for the answer of my request */
1856497c311SBarry Smith PetscCallMPI(MPIU_Irecv(sindices2 + start[i], sizes[2 * i], MPIU_INT, i, tag2, comm, recv_waits2 + count));
1861447629fSBarry Smith count++;
1871447629fSBarry Smith }
1881447629fSBarry Smith }
1896497c311SBarry Smith PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %d != count %d", nsends, count);
1901447629fSBarry Smith
1911447629fSBarry Smith /* wait on 1st sends */
192835f2295SStefano Zampini if (nsends) PetscCallMPI(MPI_Waitall(nsends, send_waits, send_status));
1931447629fSBarry Smith
1941447629fSBarry Smith /* 1st recvs: other's requests */
1951447629fSBarry Smith for (j = 0; j < nreceives; j++) {
196835f2295SStefano Zampini PetscCallMPI(MPI_Waitany(nreceives, recv_waits, &widx, &recv_status)); /* idx: index of handle for operation that completed */
1976497c311SBarry Smith PetscCallMPI(MPIU_Get_count(&recv_status, MPIU_INT, &nindices));
1981447629fSBarry Smith rbuf = rindices + nmax * widx; /* global index */
1991447629fSBarry Smith source = recv_status.MPI_SOURCE;
2001447629fSBarry Smith
2011447629fSBarry Smith /* compute mapping */
2021447629fSBarry Smith sbuf = rbuf;
2036497c311SBarry Smith for (PetscCount i = 0; i < nindices; i++) sbuf[i] = maploc[rbuf[i] - owners[rank]];
2041447629fSBarry Smith
2051447629fSBarry Smith /* send mapping back to the sender */
2066497c311SBarry Smith PetscCallMPI(MPIU_Isend(sbuf, nindices, MPIU_INT, source, tag2, comm, send_waits2 + widx));
2071447629fSBarry Smith }
2081447629fSBarry Smith
2091447629fSBarry Smith /* wait on 2nd sends */
210835f2295SStefano Zampini if (nreceives) PetscCallMPI(MPI_Waitall(nreceives, send_waits2, send_status2));
2111447629fSBarry Smith
2121447629fSBarry Smith /* 2nd recvs: for the answer of my request */
2131447629fSBarry Smith for (j = 0; j < nsends; j++) {
214835f2295SStefano Zampini PetscCallMPI(MPI_Waitany(nsends, recv_waits2, &widx, &recv_status));
2156497c311SBarry Smith PetscCallMPI(MPIU_Get_count(&recv_status, MPIU_INT, &nindices));
2161447629fSBarry Smith source = recv_status.MPI_SOURCE;
2171447629fSBarry Smith /* pack output ia[] */
2181447629fSBarry Smith rbuf = sindices2 + start[source];
2191447629fSBarry Smith count = 0;
2206497c311SBarry Smith for (PetscCount i = 0; i < n; i++) {
2211447629fSBarry Smith if (source == owner[i]) ia[i] = rbuf[count++];
2221447629fSBarry Smith }
2231447629fSBarry Smith }
2241447629fSBarry Smith
2251447629fSBarry Smith /* free arrays */
2269566063dSJacob Faibussowitsch PetscCall(PetscFree(start));
2279566063dSJacob Faibussowitsch PetscCall(PetscFree2(sizes, owner));
2289566063dSJacob Faibussowitsch PetscCall(PetscFree2(rindices, recv_waits));
2299566063dSJacob Faibussowitsch PetscCall(PetscFree2(rindices2, recv_waits2));
2309566063dSJacob Faibussowitsch PetscCall(PetscFree3(sindices, send_waits, send_status));
2319566063dSJacob Faibussowitsch PetscCall(PetscFree3(sindices2, send_waits2, send_status2));
2323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2331447629fSBarry Smith }
2341447629fSBarry Smith
AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt * ia)235da8c939bSJacob Faibussowitsch static PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao, PetscInt n, PetscInt *ia)
236d71ae5a4SJacob Faibussowitsch {
2371447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
2381447629fSBarry Smith PetscInt *app_loc = aomems->app_loc;
2391447629fSBarry Smith
2401447629fSBarry Smith PetscFunctionBegin;
2419566063dSJacob Faibussowitsch PetscCall(AOMap_MemoryScalable_private(ao, n, ia, app_loc));
2423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2431447629fSBarry Smith }
2441447629fSBarry Smith
AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt * ia)245da8c939bSJacob Faibussowitsch static PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao, PetscInt n, PetscInt *ia)
246d71ae5a4SJacob Faibussowitsch {
2471447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
2481447629fSBarry Smith PetscInt *petsc_loc = aomems->petsc_loc;
2491447629fSBarry Smith
2501447629fSBarry Smith PetscFunctionBegin;
2519566063dSJacob Faibussowitsch PetscCall(AOMap_MemoryScalable_private(ao, n, ia, petsc_loc));
2523ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2531447629fSBarry Smith }
2541447629fSBarry Smith
255da8c939bSJacob Faibussowitsch static const struct _AOOps AOOps_MemoryScalable = {
256267267bdSJacob Faibussowitsch PetscDesignatedInitializer(view, AOView_MemoryScalable),
257267267bdSJacob Faibussowitsch PetscDesignatedInitializer(destroy, AODestroy_MemoryScalable),
258267267bdSJacob Faibussowitsch PetscDesignatedInitializer(petsctoapplication, AOPetscToApplication_MemoryScalable),
259267267bdSJacob Faibussowitsch PetscDesignatedInitializer(applicationtopetsc, AOApplicationToPetsc_MemoryScalable),
2608a403008SStefano Zampini PetscDesignatedInitializer(petsctoapplicationpermuteint, NULL),
2618a403008SStefano Zampini PetscDesignatedInitializer(applicationtopetscpermuteint, NULL),
2628a403008SStefano Zampini PetscDesignatedInitializer(petsctoapplicationpermutereal, NULL),
2638a403008SStefano Zampini PetscDesignatedInitializer(applicationtopetscpermutereal, NULL),
2641447629fSBarry Smith };
2651447629fSBarry Smith
AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao,PetscInt * aomap_loc)266da8c939bSJacob Faibussowitsch static PetscErrorCode AOCreateMemoryScalable_private(MPI_Comm comm, PetscInt napp, const PetscInt from_array[], const PetscInt to_array[], AO ao, PetscInt *aomap_loc)
267d71ae5a4SJacob Faibussowitsch {
2681447629fSBarry Smith AO_MemoryScalable *aomems = (AO_MemoryScalable *)ao->data;
2691447629fSBarry Smith PetscLayout map = aomems->map;
2701447629fSBarry Smith PetscInt n_local = map->n, i, j;
2711447629fSBarry Smith PetscMPIInt rank, size, tag;
27276ec1555SBarry Smith PetscInt *owner, *start, *sizes, nsends, nreceives;
2731447629fSBarry Smith PetscInt nmax, count, *sindices, *rindices, idx, lastidx;
2741447629fSBarry Smith PetscInt *owners = aomems->map->range;
2751447629fSBarry Smith MPI_Request *send_waits, *recv_waits;
2761447629fSBarry Smith MPI_Status recv_status;
2776497c311SBarry Smith PetscMPIInt widx;
2781447629fSBarry Smith PetscInt *rbuf;
2791447629fSBarry Smith PetscInt n = napp, ip, ia;
2801447629fSBarry Smith MPI_Status *send_status;
2816497c311SBarry Smith PetscCount nindices;
282835f2295SStefano Zampini PetscMPIInt nsends_i, nreceives_i;
2831447629fSBarry Smith
2841447629fSBarry Smith PetscFunctionBegin;
2859566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(aomap_loc, n_local));
2861447629fSBarry Smith
2879566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
2889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
2891447629fSBarry Smith
2901447629fSBarry Smith /* first count number of contributors (of from_array[]) to each processor */
2919566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(2 * size, &sizes));
2929566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &owner));
2931447629fSBarry Smith
2941447629fSBarry Smith j = 0;
2951447629fSBarry Smith lastidx = -1;
2961447629fSBarry Smith for (i = 0; i < n; i++) {
2971447629fSBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */
2981447629fSBarry Smith if (lastidx > (idx = from_array[i])) j = 0;
2991447629fSBarry Smith lastidx = idx;
3001447629fSBarry Smith for (; j < size; j++) {
3011447629fSBarry Smith if (idx >= owners[j] && idx < owners[j + 1]) {
30276ec1555SBarry Smith sizes[2 * j] += 2; /* num of indices to be sent - in pairs (ip,ia) */
30376ec1555SBarry Smith sizes[2 * j + 1] = 1; /* send to proc[j] */
3041447629fSBarry Smith owner[i] = j;
3051447629fSBarry Smith break;
3061447629fSBarry Smith }
3071447629fSBarry Smith }
3081447629fSBarry Smith }
30976ec1555SBarry Smith sizes[2 * rank] = sizes[2 * rank + 1] = 0; /* do not receive from self! */
3101447629fSBarry Smith nsends = 0;
31176ec1555SBarry Smith for (i = 0; i < size; i++) nsends += sizes[2 * i + 1];
3121447629fSBarry Smith
3131447629fSBarry Smith /* inform other processors of number of messages and max length*/
3149566063dSJacob Faibussowitsch PetscCall(PetscMaxSum(comm, sizes, &nmax, &nreceives));
3151447629fSBarry Smith
3161447629fSBarry Smith /* allocate arrays */
3179566063dSJacob Faibussowitsch PetscCall(PetscObjectGetNewTag((PetscObject)ao, &tag));
3189566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nreceives * nmax, &rindices, nreceives, &recv_waits));
3199566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(2 * n, &sindices, nsends, &send_waits, nsends, &send_status));
3209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(size, &start));
3211447629fSBarry Smith
3221447629fSBarry Smith /* post receives: */
3236497c311SBarry Smith for (i = 0; i < nreceives; i++) PetscCallMPI(MPIU_Irecv(rindices + nmax * i, nmax, MPIU_INT, MPI_ANY_SOURCE, tag, comm, recv_waits + i));
3241447629fSBarry Smith
3251447629fSBarry Smith /* do sends:
3261447629fSBarry Smith 1) starts[i] gives the starting index in svalues for stuff going to
3271447629fSBarry Smith the ith processor
3281447629fSBarry Smith */
3291447629fSBarry Smith start[0] = 0;
33076ec1555SBarry Smith for (i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3311447629fSBarry Smith for (i = 0; i < n; i++) {
3321447629fSBarry Smith j = owner[i];
3331447629fSBarry Smith if (j != rank) {
3341447629fSBarry Smith ip = from_array[i];
3351447629fSBarry Smith ia = to_array[i];
3361447629fSBarry Smith sindices[start[j]++] = ip;
3371447629fSBarry Smith sindices[start[j]++] = ia;
3381447629fSBarry Smith } else { /* compute my own map */
3391447629fSBarry Smith ip = from_array[i] - owners[rank];
3401447629fSBarry Smith ia = to_array[i];
3411447629fSBarry Smith aomap_loc[ip] = ia;
3421447629fSBarry Smith }
3431447629fSBarry Smith }
3441447629fSBarry Smith
3451447629fSBarry Smith start[0] = 0;
3466497c311SBarry Smith for (PetscMPIInt i = 1; i < size; i++) start[i] = start[i - 1] + sizes[2 * i - 2];
3476497c311SBarry Smith count = 0;
3486497c311SBarry Smith for (PetscMPIInt i = 0; i < size; i++) {
34976ec1555SBarry Smith if (sizes[2 * i + 1]) {
3506497c311SBarry Smith PetscCallMPI(MPIU_Isend(sindices + start[i], sizes[2 * i], MPIU_INT, i, tag, comm, send_waits + count));
3511447629fSBarry Smith count++;
3521447629fSBarry Smith }
3531447629fSBarry Smith }
35408401ef6SPierre Jolivet PetscCheck(nsends == count, comm, PETSC_ERR_SUP, "nsends %" PetscInt_FMT " != count %" PetscInt_FMT, nsends, count);
355835f2295SStefano Zampini PetscCall(PetscMPIIntCast(nsends, &nsends_i));
356835f2295SStefano Zampini PetscCall(PetscMPIIntCast(nreceives, &nreceives_i));
3571447629fSBarry Smith
3581447629fSBarry Smith /* wait on sends */
359835f2295SStefano Zampini if (nsends) PetscCallMPI(MPI_Waitall(nsends_i, send_waits, send_status));
3601447629fSBarry Smith
3611447629fSBarry Smith /* recvs */
3621447629fSBarry Smith count = 0;
3631447629fSBarry Smith for (j = nreceives; j > 0; j--) {
364835f2295SStefano Zampini PetscCallMPI(MPI_Waitany(nreceives_i, recv_waits, &widx, &recv_status));
3656497c311SBarry Smith PetscCallMPI(MPIU_Get_count(&recv_status, MPIU_INT, &nindices));
3661447629fSBarry Smith rbuf = rindices + nmax * widx; /* global index */
3671447629fSBarry Smith
3681447629fSBarry Smith /* compute local mapping */
3691447629fSBarry Smith for (i = 0; i < nindices; i += 2) { /* pack aomap_loc */
3701447629fSBarry Smith ip = rbuf[i] - owners[rank]; /* local index */
3711447629fSBarry Smith ia = rbuf[i + 1];
3721447629fSBarry Smith aomap_loc[ip] = ia;
3731447629fSBarry Smith }
3741447629fSBarry Smith count++;
3751447629fSBarry Smith }
3761447629fSBarry Smith
3779566063dSJacob Faibussowitsch PetscCall(PetscFree(start));
3789566063dSJacob Faibussowitsch PetscCall(PetscFree3(sindices, send_waits, send_status));
3799566063dSJacob Faibussowitsch PetscCall(PetscFree2(rindices, recv_waits));
3809566063dSJacob Faibussowitsch PetscCall(PetscFree(owner));
3819566063dSJacob Faibussowitsch PetscCall(PetscFree(sizes));
3823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3831447629fSBarry Smith }
3841447629fSBarry Smith
AOCreate_MemoryScalable(AO ao)385da8c939bSJacob Faibussowitsch PETSC_INTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
386d71ae5a4SJacob Faibussowitsch {
3871447629fSBarry Smith IS isapp = ao->isapp, ispetsc = ao->ispetsc;
3881447629fSBarry Smith const PetscInt *mypetsc, *myapp;
3891447629fSBarry Smith PetscInt napp, n_local, N, i, start, *petsc, *lens, *disp;
3901447629fSBarry Smith MPI_Comm comm;
3911447629fSBarry Smith AO_MemoryScalable *aomems;
3921447629fSBarry Smith PetscLayout map;
3931447629fSBarry Smith PetscMPIInt size, rank;
3941447629fSBarry Smith
3951447629fSBarry Smith PetscFunctionBegin;
39628b400f6SJacob Faibussowitsch PetscCheck(isapp, PetscObjectComm((PetscObject)ao), PETSC_ERR_ARG_WRONGSTATE, "AOSetIS() must be called before AOSetType()");
3971447629fSBarry Smith /* create special struct aomems */
3984dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&aomems));
3991447629fSBarry Smith ao->data = (void *)aomems;
400aea10558SJacob Faibussowitsch ao->ops[0] = AOOps_MemoryScalable;
4019566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)ao, AOMEMORYSCALABLE));
4021447629fSBarry Smith
4031447629fSBarry Smith /* transmit all local lengths of isapp to all processors */
4049566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
4059566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
4069566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
4079566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(size, &lens, size, &disp));
4089566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(isapp, &napp));
4099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm));
4101447629fSBarry Smith
4111447629fSBarry Smith N = 0;
4121447629fSBarry Smith for (i = 0; i < size; i++) {
4131447629fSBarry Smith disp[i] = N;
4141447629fSBarry Smith N += lens[i];
4151447629fSBarry Smith }
4161447629fSBarry Smith
4171447629fSBarry Smith /* If ispetsc is 0 then use "natural" numbering */
4181447629fSBarry Smith if (napp) {
4191447629fSBarry Smith if (!ispetsc) {
4201447629fSBarry Smith start = disp[rank];
4219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(napp + 1, &petsc));
4221447629fSBarry Smith for (i = 0; i < napp; i++) petsc[i] = start + i;
4231447629fSBarry Smith } else {
4249566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ispetsc, &mypetsc));
4251447629fSBarry Smith petsc = (PetscInt *)mypetsc;
4261447629fSBarry Smith }
4274a2f8832SBarry Smith } else {
4284a2f8832SBarry Smith petsc = NULL;
4291447629fSBarry Smith }
4301447629fSBarry Smith
4311447629fSBarry Smith /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
4329566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreate(comm, &map));
4331447629fSBarry Smith map->bs = 1;
4341447629fSBarry Smith map->N = N;
4359566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(map));
4361447629fSBarry Smith
4371447629fSBarry Smith ao->N = N;
4381447629fSBarry Smith ao->n = map->n;
4391447629fSBarry Smith aomems->map = map;
4401447629fSBarry Smith
4411447629fSBarry Smith /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
4421447629fSBarry Smith n_local = map->n;
4439566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(n_local, &aomems->app_loc, n_local, &aomems->petsc_loc));
4449566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isapp, &myapp));
4451447629fSBarry Smith
4469566063dSJacob Faibussowitsch PetscCall(AOCreateMemoryScalable_private(comm, napp, petsc, myapp, ao, aomems->app_loc));
4479566063dSJacob Faibussowitsch PetscCall(AOCreateMemoryScalable_private(comm, napp, myapp, petsc, ao, aomems->petsc_loc));
4481447629fSBarry Smith
4499566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isapp, &myapp));
4501447629fSBarry Smith if (napp) {
4511447629fSBarry Smith if (ispetsc) {
4529566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ispetsc, &mypetsc));
4531447629fSBarry Smith } else {
4549566063dSJacob Faibussowitsch PetscCall(PetscFree(petsc));
4551447629fSBarry Smith }
4561447629fSBarry Smith }
4579566063dSJacob Faibussowitsch PetscCall(PetscFree2(lens, disp));
4583ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4591447629fSBarry Smith }
4601447629fSBarry Smith
461cc4c1da9SBarry Smith /*@
4621447629fSBarry Smith AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
4631447629fSBarry Smith
464d083f849SBarry Smith Collective
4651447629fSBarry Smith
4661447629fSBarry Smith Input Parameters:
467cab54364SBarry Smith + comm - MPI communicator that is to share the `AO`
468d38ec673SBarry Smith . napp - size of `myapp` and `mypetsc`
4691447629fSBarry Smith . myapp - integer array that defines an ordering
470b6971eaeSBarry Smith - mypetsc - integer array that defines another ordering (may be `NULL` to indicate the natural ordering, that is 0,1,2,3,...)
4711447629fSBarry Smith
4721447629fSBarry Smith Output Parameter:
4731447629fSBarry Smith . aoout - the new application ordering
4741447629fSBarry Smith
4751447629fSBarry Smith Level: beginner
4761447629fSBarry Smith
477cab54364SBarry Smith Note:
478b6971eaeSBarry Smith The arrays `myapp` and `mypetsc` must contain the all the integers 0 to `napp`-1 with no duplicates; that is there cannot be any "holes"
479cab54364SBarry Smith in the indices. Use `AOCreateMapping()` or `AOCreateMappingIS()` if you wish to have "holes" in the indices.
480cab54364SBarry Smith Comparing with `AOCreateBasic()`, this routine trades memory with message communication.
4811447629fSBarry Smith
482cab54364SBarry Smith .seealso: [](sec_ao), [](sec_scatter), `AO`, `AOCreateMemoryScalableIS()`, `AODestroy()`, `AOPetscToApplication()`, `AOApplicationToPetsc()`
4831447629fSBarry Smith @*/
AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO * aoout)484d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm, PetscInt napp, const PetscInt myapp[], const PetscInt mypetsc[], AO *aoout)
485d71ae5a4SJacob Faibussowitsch {
4861447629fSBarry Smith IS isapp, ispetsc;
4871447629fSBarry Smith const PetscInt *app = myapp, *petsc = mypetsc;
4881447629fSBarry Smith
4891447629fSBarry Smith PetscFunctionBegin;
4909566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, napp, app, PETSC_USE_POINTER, &isapp));
4911447629fSBarry Smith if (mypetsc) {
4929566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, napp, petsc, PETSC_USE_POINTER, &ispetsc));
4931447629fSBarry Smith } else {
4941447629fSBarry Smith ispetsc = NULL;
4951447629fSBarry Smith }
4969566063dSJacob Faibussowitsch PetscCall(AOCreateMemoryScalableIS(isapp, ispetsc, aoout));
4979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isapp));
49848a46eb9SPierre Jolivet if (mypetsc) PetscCall(ISDestroy(&ispetsc));
4993ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
5001447629fSBarry Smith }
5011447629fSBarry Smith
5025d83a8b1SBarry Smith /*@
5031447629fSBarry Smith AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
5041447629fSBarry Smith
505c3339decSBarry Smith Collective
5061447629fSBarry Smith
5071447629fSBarry Smith Input Parameters:
5081447629fSBarry Smith + isapp - index set that defines an ordering
509b6971eaeSBarry Smith - ispetsc - index set that defines another ordering (may be `NULL` to use the natural ordering)
5101447629fSBarry Smith
5111447629fSBarry Smith Output Parameter:
5121447629fSBarry Smith . aoout - the new application ordering
5131447629fSBarry Smith
5141447629fSBarry Smith Level: beginner
5151447629fSBarry Smith
51695452b02SPatrick Sanan Notes:
517b6971eaeSBarry Smith The index sets `isapp` and `ispetsc` must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
5181447629fSBarry Smith that is there cannot be any "holes".
519cab54364SBarry Smith
520cab54364SBarry Smith Comparing with `AOCreateBasicIS()`, this routine trades memory with message communication.
521cab54364SBarry Smith
522cab54364SBarry Smith .seealso: [](sec_ao), [](sec_scatter), `AO`, `AOCreateBasicIS()`, `AOCreateMemoryScalable()`, `AODestroy()`
5231447629fSBarry Smith @*/
AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO * aoout)524d71ae5a4SJacob Faibussowitsch PetscErrorCode AOCreateMemoryScalableIS(IS isapp, IS ispetsc, AO *aoout)
525d71ae5a4SJacob Faibussowitsch {
5261447629fSBarry Smith MPI_Comm comm;
5271447629fSBarry Smith AO ao;
5281447629fSBarry Smith
5291447629fSBarry Smith PetscFunctionBegin;
5309566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)isapp, &comm));
5319566063dSJacob Faibussowitsch PetscCall(AOCreate(comm, &ao));
5329566063dSJacob Faibussowitsch PetscCall(AOSetIS(ao, isapp, ispetsc));
5339566063dSJacob Faibussowitsch PetscCall(AOSetType(ao, AOMEMORYSCALABLE));
5349566063dSJacob Faibussowitsch PetscCall(AOViewFromOptions(ao, NULL, "-ao_view"));
5351447629fSBarry Smith *aoout = ao;
5363ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
5371447629fSBarry Smith }
538