xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 
2 /*
3     The memory scalable AO application ordering routines. These store the
4   orderings on each processor for that processor's range of values
5 */
6 
7 #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
8 
9 typedef struct {
10   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
11   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
12   PetscLayout map;         /* determines the local sizes of ao */
13 } AO_MemoryScalable;
14 
15 /*
16        All processors ship the data to process 0 to be printed; note that this is not scalable because
17        process 0 allocates space for all the orderings entry across all the processes
18 */
19 PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
20 {
21   PetscErrorCode    ierr;
22   PetscMPIInt       rank,size;
23   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
24   PetscBool         iascii;
25   PetscMPIInt       tag_app,tag_petsc;
26   PetscLayout       map = aomems->map;
27   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
28   MPI_Status        status;
29 
30   PetscFunctionBegin;
31   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
32   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
33 
34   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRMPI(ierr);
35   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRMPI(ierr);
36 
37   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
38   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
39 
40   if (rank == 0) {
41     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr);
42     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
43 
44     ierr = PetscMalloc2(map->N,&app,map->N,&petsc);CHKERRQ(ierr);
45     len  = map->n;
46     /* print local AO */
47     ierr = PetscViewerASCIIPrintf(viewer,"Process [%d]\n",rank);CHKERRQ(ierr);
48     for (i=0; i<len; i++) {
49       ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
50     }
51 
52     /* recv and print off-processor's AO */
53     for (i=1; i<size; i++) {
54       len       = map->range[i+1] - map->range[i];
55       app_loc   = app  + map->range[i];
56       petsc_loc = petsc+ map->range[i];
57       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRMPI(ierr);
58       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRMPI(ierr);
59       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr);
60       for (j=0; j<len; j++) {
61         ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
62       }
63     }
64     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
65 
66   } else {
67     /* send values */
68     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRMPI(ierr);
69     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRMPI(ierr);
70   }
71   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode AODestroy_MemoryScalable(AO ao)
76 {
77   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
78   PetscErrorCode    ierr;
79 
80   PetscFunctionBegin;
81   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
82   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
83   ierr = PetscFree(aomems);CHKERRQ(ierr);
84   PetscFunctionReturn(0);
85 }
86 
87 /*
88    Input Parameters:
89 +   ao - the application ordering context
90 .   n  - the number of integers in ia[]
91 .   ia - the integers; these are replaced with their mapped value
92 -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
93 
94    Output Parameter:
95 .   ia - the mapped interges
96  */
97 PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,const PetscInt *maploc)
98 {
99   PetscErrorCode    ierr;
100   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
101   MPI_Comm          comm;
102   PetscMPIInt       rank,size,tag1,tag2;
103   PetscInt          *owner,*start,*sizes,nsends,nreceives;
104   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
105   const PetscInt    *owners = aomems->map->range;
106   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
107   MPI_Status        recv_status;
108   PetscMPIInt       nindices,source,widx;
109   PetscInt          *rbuf,*sbuf;
110   MPI_Status        *send_status,*send_status2;
111 
112   PetscFunctionBegin;
113   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
114   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
115   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
116 
117   /*  first count number of contributors to each processor */
118   ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
119   ierr = PetscCalloc2(2*size,&sizes,n,&owner);CHKERRQ(ierr);
120 
121   j       = 0;
122   lastidx = -1;
123   for (i=0; i<n; i++) {
124     if (ia[i] < 0) owner[i] = -1; /* mark negative entries (which are not to be mapped) with a special negative value */
125     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
126     else {
127       /* if indices are NOT locally sorted, need to start search at the beginning */
128       if (lastidx > (idx = ia[i])) j = 0;
129       lastidx = idx;
130       for (; j<size; j++) {
131         if (idx >= owners[j] && idx < owners[j+1]) {
132           sizes[2*j]++;     /* num of indices to be sent */
133           sizes[2*j+1] = 1; /* send to proc[j] */
134           owner[i]     = j;
135           break;
136         }
137       }
138     }
139   }
140   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
141   nsends        = 0;
142   for (i=0; i<size; i++) nsends += sizes[2*i+1];
143 
144   /* inform other processors of number of messages and max length*/
145   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
146 
147   /* allocate arrays */
148   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
149   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
150 
151   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
152   ierr = PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);CHKERRQ(ierr);
153 
154   ierr = PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
155   ierr = PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);CHKERRQ(ierr);
156 
157   /* post 1st receives: receive others requests
158      since we don't know how long each individual message is we
159      allocate the largest needed buffer for each receive. Potentially
160      this is a lot of wasted space.
161   */
162   for (i=0,count=0; i<nreceives; i++) {
163     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRMPI(ierr);
164   }
165 
166   /* do 1st sends:
167       1) starts[i] gives the starting index in svalues for stuff going to
168          the ith processor
169   */
170   start[0] = 0;
171   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
172   for (i=0; i<n; i++) {
173     j = owner[i];
174     if (j == -1) continue; /* do not remap negative entries in ia[] */
175     else if (j == -2) { /* out of range entries get mapped to -1 */
176       ia[i] = -1;
177       continue;
178     } else if (j != rank) {
179       sindices[start[j]++]  = ia[i];
180     } else { /* compute my own map */
181       ia[i] = maploc[ia[i]-owners[rank]];
182     }
183   }
184 
185   start[0] = 0;
186   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
187   for (i=0,count=0; i<size; i++) {
188     if (sizes[2*i+1]) {
189       /* send my request to others */
190       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRMPI(ierr);
191       /* post receive for the answer of my request */
192       ierr = MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRMPI(ierr);
193       count++;
194     }
195   }
196   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
197 
198   /* wait on 1st sends */
199   if (nsends) {
200     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRMPI(ierr);
201   }
202 
203   /* 1st recvs: other's requests */
204   for (j=0; j< nreceives; j++) {
205     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRMPI(ierr); /* idx: index of handle for operation that completed */
206     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr);
207     rbuf   = rindices+nmax*widx; /* global index */
208     source = recv_status.MPI_SOURCE;
209 
210     /* compute mapping */
211     sbuf = rbuf;
212     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
213 
214     /* send mapping back to the sender */
215     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRMPI(ierr);
216   }
217 
218   /* wait on 2nd sends */
219   if (nreceives) {
220     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRMPI(ierr);
221   }
222 
223   /* 2nd recvs: for the answer of my request */
224   for (j=0; j< nsends; j++) {
225     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRMPI(ierr);
226     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr);
227     source = recv_status.MPI_SOURCE;
228     /* pack output ia[] */
229     rbuf  = sindices2+start[source];
230     count = 0;
231     for (i=0; i<n; i++) {
232       if (source == owner[i]) ia[i] = rbuf[count++];
233     }
234   }
235 
236   /* free arrays */
237   ierr = PetscFree(start);CHKERRQ(ierr);
238   ierr = PetscFree2(sizes,owner);CHKERRQ(ierr);
239   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
240   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
241   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
242   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
247 {
248   PetscErrorCode    ierr;
249   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
250   PetscInt          *app_loc = aomems->app_loc;
251 
252   PetscFunctionBegin;
253   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
258 {
259   PetscErrorCode    ierr;
260   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
261   PetscInt          *petsc_loc = aomems->petsc_loc;
262 
263   PetscFunctionBegin;
264   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 static struct _AOOps AOOps_MemoryScalable = {
269   AOView_MemoryScalable,
270   AODestroy_MemoryScalable,
271   AOPetscToApplication_MemoryScalable,
272   AOApplicationToPetsc_MemoryScalable,
273   NULL,
274   NULL,
275   NULL,
276   NULL
277 };
278 
279 PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
280 {
281   PetscErrorCode    ierr;
282   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
283   PetscLayout       map     = aomems->map;
284   PetscInt          n_local = map->n,i,j;
285   PetscMPIInt       rank,size,tag;
286   PetscInt          *owner,*start,*sizes,nsends,nreceives;
287   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
288   PetscInt          *owners = aomems->map->range;
289   MPI_Request       *send_waits,*recv_waits;
290   MPI_Status        recv_status;
291   PetscMPIInt       nindices,widx;
292   PetscInt          *rbuf;
293   PetscInt          n=napp,ip,ia;
294   MPI_Status        *send_status;
295 
296   PetscFunctionBegin;
297   ierr = PetscArrayzero(aomap_loc,n_local);CHKERRQ(ierr);
298 
299   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
300   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
301 
302   /*  first count number of contributors (of from_array[]) to each processor */
303   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
304   ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr);
305 
306   j       = 0;
307   lastidx = -1;
308   for (i=0; i<n; i++) {
309     /* if indices are NOT locally sorted, need to start search at the beginning */
310     if (lastidx > (idx = from_array[i])) j = 0;
311     lastidx = idx;
312     for (; j<size; j++) {
313       if (idx >= owners[j] && idx < owners[j+1]) {
314         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
315         sizes[2*j+1] = 1; /* send to proc[j] */
316         owner[i]     = j;
317         break;
318       }
319     }
320   }
321   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
322   nsends        = 0;
323   for (i=0; i<size; i++) nsends += sizes[2*i+1];
324 
325   /* inform other processors of number of messages and max length*/
326   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
327 
328   /* allocate arrays */
329   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
330   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
331   ierr = PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
332   ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
333 
334   /* post receives: */
335   for (i=0; i<nreceives; i++) {
336     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRMPI(ierr);
337   }
338 
339   /* do sends:
340       1) starts[i] gives the starting index in svalues for stuff going to
341          the ith processor
342   */
343   start[0] = 0;
344   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
345   for (i=0; i<n; i++) {
346     j = owner[i];
347     if (j != rank) {
348       ip                   = from_array[i];
349       ia                   = to_array[i];
350       sindices[start[j]++] = ip;
351       sindices[start[j]++] = ia;
352     } else { /* compute my own map */
353       ip            = from_array[i] - owners[rank];
354       ia            = to_array[i];
355       aomap_loc[ip] = ia;
356     }
357   }
358 
359   start[0] = 0;
360   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
361   for (i=0,count=0; i<size; i++) {
362     if (sizes[2*i+1]) {
363       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRMPI(ierr);
364       count++;
365     }
366   }
367   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
368 
369   /* wait on sends */
370   if (nsends) {
371     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRMPI(ierr);
372   }
373 
374   /* recvs */
375   count=0;
376   for (j= nreceives; j>0; j--) {
377     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRMPI(ierr);
378     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr);
379     rbuf = rindices+nmax*widx; /* global index */
380 
381     /* compute local mapping */
382     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
383       ip            = rbuf[i] - owners[rank]; /* local index */
384       ia            = rbuf[i+1];
385       aomap_loc[ip] = ia;
386     }
387     count++;
388   }
389 
390   ierr = PetscFree(start);CHKERRQ(ierr);
391   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
392   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
393   ierr = PetscFree(owner);CHKERRQ(ierr);
394   ierr = PetscFree(sizes);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
399 {
400   PetscErrorCode    ierr;
401   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
402   const PetscInt    *mypetsc,*myapp;
403   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
404   MPI_Comm          comm;
405   AO_MemoryScalable *aomems;
406   PetscLayout       map;
407   PetscMPIInt       size,rank;
408 
409   PetscFunctionBegin;
410   if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
411   /* create special struct aomems */
412   ierr     = PetscNewLog(ao,&aomems);CHKERRQ(ierr);
413   ao->data = (void*) aomems;
414   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
415   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
416 
417   /* transmit all local lengths of isapp to all processors */
418   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
419   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
420   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
421   ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr);
422   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
423   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRMPI(ierr);
424 
425   N = 0;
426   for (i = 0; i < size; i++) {
427     disp[i] = N;
428     N      += lens[i];
429   }
430 
431   /* If ispetsc is 0 then use "natural" numbering */
432   if (napp) {
433     if (!ispetsc) {
434       start = disp[rank];
435       ierr  = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr);
436       for (i=0; i<napp; i++) petsc[i] = start + i;
437     } else {
438       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
439       petsc = (PetscInt*)mypetsc;
440     }
441   } else {
442     petsc = NULL;
443   }
444 
445   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
446   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
447   map->bs = 1;
448   map->N  = N;
449   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
450 
451   ao->N       = N;
452   ao->n       = map->n;
453   aomems->map = map;
454 
455   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
456   n_local = map->n;
457   ierr    = PetscCalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr);
458   ierr    = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
459   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
460 
461   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
462   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
463 
464   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
465   if (napp) {
466     if (ispetsc) {
467       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
468     } else {
469       ierr = PetscFree(petsc);CHKERRQ(ierr);
470     }
471   }
472   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 /*@C
477    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
478 
479    Collective
480 
481    Input Parameters:
482 +  comm - MPI communicator that is to share AO
483 .  napp - size of integer arrays
484 .  myapp - integer array that defines an ordering
485 -  mypetsc - integer array that defines another ordering (may be NULL to
486              indicate the natural ordering, that is 0,1,2,3,...)
487 
488    Output Parameter:
489 .  aoout - the new application ordering
490 
491    Level: beginner
492 
493     Notes:
494     The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
495            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
496            Comparing with AOCreateBasic(), this routine trades memory with message communication.
497 
498 .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
499 @*/
500 PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
501 {
502   PetscErrorCode ierr;
503   IS             isapp,ispetsc;
504   const PetscInt *app=myapp,*petsc=mypetsc;
505 
506   PetscFunctionBegin;
507   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
508   if (mypetsc) {
509     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
510   } else {
511     ispetsc = NULL;
512   }
513   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
514   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
515   if (mypetsc) {
516     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
517   }
518   PetscFunctionReturn(0);
519 }
520 
521 /*@C
522    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
523 
524    Collective on IS
525 
526    Input Parameters:
527 +  isapp - index set that defines an ordering
528 -  ispetsc - index set that defines another ordering (may be NULL to use the
529              natural ordering)
530 
531    Output Parameter:
532 .  aoout - the new application ordering
533 
534    Level: beginner
535 
536     Notes:
537     The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
538            that is there cannot be any "holes".
539            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
540 .seealso: AOCreateMemoryScalable(),  AODestroy()
541 @*/
542 PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
543 {
544   PetscErrorCode ierr;
545   MPI_Comm       comm;
546   AO             ao;
547 
548   PetscFunctionBegin;
549   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
550   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
551   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
552   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
553   ierr   = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr);
554   *aoout = ao;
555   PetscFunctionReturn(0);
556 }
557