xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision 8cc058d9cd56c1ccb3be12a47760ddfc446aaffc)
1 
2 /*
3     The memory scalable AO application ordering routines. These store the
4   local orderings on each processor.
5 */
6 
7 #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
8 
9 typedef struct {
10   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
11   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
12   PetscLayout map;         /* determines the local sizes of ao */
13 } AO_MemoryScalable;
14 
15 /*
16        All processors have the same data so processor 1 prints it
17 */
18 #undef __FUNCT__
19 #define __FUNCT__ "AOView_MemoryScalable"
20 PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
21 {
22   PetscErrorCode    ierr;
23   PetscMPIInt       rank,size;
24   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
25   PetscBool         iascii;
26   PetscMPIInt       tag_app,tag_petsc;
27   PetscLayout       map = aomems->map;
28   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
29   MPI_Status        status;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
33   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
34 
35   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr);
36   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr);
37 
38   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
39   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
40 
41   if (!rank) {
42     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr);
43     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
44 
45     ierr = PetscMalloc2(map->N,PetscInt,&app,map->N,PetscInt,&petsc);CHKERRQ(ierr);
46     len  = map->n;
47     /* print local AO */
48     ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);CHKERRQ(ierr);
49     for (i=0; i<len; i++) {
50       ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
51     }
52 
53     /* recv and print off-processor's AO */
54     for (i=1; i<size; i++) {
55       len       = map->range[i+1] - map->range[i];
56       app_loc   = app  + map->range[i];
57       petsc_loc = petsc+ map->range[i];
58       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
59       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
60       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr);
61       for (j=0; j<len; j++) {
62         ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
63       }
64     }
65     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
66 
67   } else {
68     /* send values */
69     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
70     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
71   }
72   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "AODestroy_MemoryScalable"
78 PetscErrorCode AODestroy_MemoryScalable(AO ao)
79 {
80   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
81   PetscErrorCode    ierr;
82 
83   PetscFunctionBegin;
84   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
85   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
86   ierr = PetscFree(aomems);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 /*
91    Input Parameters:
92 +   ao - the application ordering context
93 .   n  - the number of integers in ia[]
94 .   ia - the integers; these are replaced with their mapped value
95 -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
96 
97    Output Parameter:
98 .   ia - the mapped interges
99  */
100 #undef __FUNCT__
101 #define __FUNCT__ "AOMap_MemoryScalable_private"
102 PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
103 {
104   PetscErrorCode    ierr;
105   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
106   MPI_Comm          comm;
107   PetscMPIInt       rank,size,tag1,tag2;
108   PetscInt          *owner,*start,*nprocs,nsends,nreceives;
109   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
110   PetscInt          *owners = aomems->map->range;
111   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
112   MPI_Status        recv_status;
113   PetscMPIInt       nindices,source,widx;
114   PetscInt          *rbuf,*sbuf;
115   MPI_Status        *send_status,*send_status2;
116 
117   PetscFunctionBegin;
118   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
119   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
120   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
121 
122   /*  first count number of contributors to each processor */
123   ierr = PetscMalloc2(2*size,PetscInt,&nprocs,size,PetscInt,&start);CHKERRQ(ierr);
124   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
126   ierr = PetscMemzero(owner,n*sizeof(PetscInt));CHKERRQ(ierr);
127 
128   j       = 0;
129   lastidx = -1;
130   for (i=0; i<n; i++) {
131     /* if indices are NOT locally sorted, need to start search at the beginning */
132     if (lastidx > (idx = ia[i])) j = 0;
133     lastidx = idx;
134     for (; j<size; j++) {
135       if (idx >= owners[j] && idx < owners[j+1]) {
136         nprocs[2*j]++;     /* num of indices to be sent */
137         nprocs[2*j+1] = 1; /* send to proc[j] */
138         owner[i]      = j;
139         break;
140       }
141     }
142   }
143   nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */
144   nsends        = 0;
145   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
146 
147   /* inform other processors of number of messages and max length*/
148   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
149 
150   /* allocate arrays */
151   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
152   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
153 
154   ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr);
155   ierr = PetscMalloc2(nsends*nmax,PetscInt,&rindices2,nsends,MPI_Request,&recv_waits2);CHKERRQ(ierr);
156 
157   ierr = PetscMalloc3(n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr);
158   ierr = PetscMalloc3(n,PetscInt,&sindices2,nreceives,MPI_Request,&send_waits2,nreceives,MPI_Status,&send_status2);CHKERRQ(ierr);
159 
160   /* post 1st receives: receive others requests
161      since we don't know how long each individual message is we
162      allocate the largest needed buffer for each receive. Potentially
163      this is a lot of wasted space.
164   */
165   for (i=0,count=0; i<nreceives; i++) {
166     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
167   }
168 
169   /* do 1st sends:
170       1) starts[i] gives the starting index in svalues for stuff going to
171          the ith processor
172   */
173   start[0] = 0;
174   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
175   for (i=0; i<n; i++) {
176     j = owner[i];
177     if (j != rank) {
178       sindices[start[j]++]  = ia[i];
179     } else { /* compute my own map */
180       if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
181         ia[i] = maploc[ia[i]-owners[rank]];
182       } else {
183         ia[i] = -1;  /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
184       }
185     }
186   }
187 
188   start[0] = 0;
189   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
190   for (i=0,count=0; i<size; i++) {
191     if (nprocs[2*i+1]) {
192       /* send my request to others */
193       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr);
194       /* post receive for the answer of my request */
195       ierr = MPI_Irecv(sindices2+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr);
196       count++;
197     }
198   }
199   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
200 
201   /* wait on 1st sends */
202   if (nsends) {
203     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
204   }
205 
206   /* 1st recvs: other's requests */
207   for (j=0; j< nreceives; j++) {
208     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */
209     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
210     rbuf   = rindices+nmax*widx; /* global index */
211     source = recv_status.MPI_SOURCE;
212 
213     /* compute mapping */
214     sbuf = rbuf;
215     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
216 
217     /* send mapping back to the sender */
218     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr);
219   }
220 
221   /* wait on 2nd sends */
222   if (nreceives) {
223     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr);
224   }
225 
226   /* 2nd recvs: for the answer of my request */
227   for (j=0; j< nsends; j++) {
228     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr);
229     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
230     source = recv_status.MPI_SOURCE;
231     /* pack output ia[] */
232     rbuf  = sindices2+start[source];
233     count = 0;
234     for (i=0; i<n; i++) {
235       if (source == owner[i]) ia[i] = rbuf[count++];
236     }
237   }
238 
239   /* free arrays */
240   ierr = PetscFree2(nprocs,start);CHKERRQ(ierr);
241   ierr = PetscFree(owner);CHKERRQ(ierr);
242   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
243   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
244   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
245   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "AOPetscToApplication_MemoryScalable"
251 PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
252 {
253   PetscErrorCode    ierr;
254   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
255   PetscInt          *app_loc = aomems->app_loc;
256 
257   PetscFunctionBegin;
258   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "AOApplicationToPetsc_MemoryScalable"
264 PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
265 {
266   PetscErrorCode    ierr;
267   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
268   PetscInt          *petsc_loc = aomems->petsc_loc;
269 
270   PetscFunctionBegin;
271   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 static struct _AOOps AOOps_MemoryScalable = {
276   AOView_MemoryScalable,
277   AODestroy_MemoryScalable,
278   AOPetscToApplication_MemoryScalable,
279   AOApplicationToPetsc_MemoryScalable,
280   0,
281   0,
282   0,
283   0
284 };
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "AOCreateMemoryScalable_private"
288 PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
289 {
290   PetscErrorCode    ierr;
291   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
292   PetscLayout       map     = aomems->map;
293   PetscInt          n_local = map->n,i,j;
294   PetscMPIInt       rank,size,tag;
295   PetscInt          *owner,*start,*nprocs,nsends,nreceives;
296   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
297   PetscInt          *owners = aomems->map->range;
298   MPI_Request       *send_waits,*recv_waits;
299   MPI_Status        recv_status;
300   PetscMPIInt       nindices,widx;
301   PetscInt          *rbuf;
302   PetscInt          n=napp,ip,ia;
303   MPI_Status        *send_status;
304 
305   PetscFunctionBegin;
306   ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
307 
308   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
309   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
310 
311   /*  first count number of contributors (of from_array[]) to each processor */
312   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
313   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
314   ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
315 
316   j       = 0;
317   lastidx = -1;
318   for (i=0; i<n; i++) {
319     /* if indices are NOT locally sorted, need to start search at the beginning */
320     if (lastidx > (idx = from_array[i])) j = 0;
321     lastidx = idx;
322     for (; j<size; j++) {
323       if (idx >= owners[j] && idx < owners[j+1]) {
324         nprocs[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
325         nprocs[2*j+1] = 1; /* send to proc[j] */
326         owner[i]      = j;
327         break;
328       }
329     }
330   }
331   nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */
332   nsends        = 0;
333   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
334 
335   /* inform other processors of number of messages and max length*/
336   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
337 
338   /* allocate arrays */
339   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
340   ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr);
341   ierr = PetscMalloc3(2*n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr);
342   ierr = PetscMalloc(size*sizeof(PetscInt),&start);CHKERRQ(ierr);
343 
344   /* post receives: */
345   for (i=0; i<nreceives; i++) {
346     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
347   }
348 
349   /* do sends:
350       1) starts[i] gives the starting index in svalues for stuff going to
351          the ith processor
352   */
353   start[0] = 0;
354   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
355   for (i=0; i<n; i++) {
356     j = owner[i];
357     if (j != rank) {
358       ip                   = from_array[i];
359       ia                   = to_array[i];
360       sindices[start[j]++] = ip;
361       sindices[start[j]++] = ia;
362     } else { /* compute my own map */
363       ip            = from_array[i] - owners[rank];
364       ia            = to_array[i];
365       aomap_loc[ip] = ia;
366     }
367   }
368 
369   start[0] = 0;
370   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
371   for (i=0,count=0; i<size; i++) {
372     if (nprocs[2*i+1]) {
373       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
374       count++;
375     }
376   }
377   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
378 
379   /* wait on sends */
380   if (nsends) {
381     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382   }
383 
384   /* recvs */
385   count=0;
386   for (j= nreceives; j>0; j--) {
387     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr);
388     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
389     rbuf = rindices+nmax*widx; /* global index */
390 
391     /* compute local mapping */
392     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
393       ip            = rbuf[i] - owners[rank]; /* local index */
394       ia            = rbuf[i+1];
395       aomap_loc[ip] = ia;
396     }
397     count++;
398   }
399 
400   ierr = PetscFree(start);CHKERRQ(ierr);
401   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
402   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
403   ierr = PetscFree(owner);CHKERRQ(ierr);
404   ierr = PetscFree(nprocs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "AOCreate_MemoryScalable"
410 PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
411 {
412   PetscErrorCode    ierr;
413   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
414   const PetscInt    *mypetsc,*myapp;
415   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
416   MPI_Comm          comm;
417   AO_MemoryScalable *aomems;
418   PetscLayout       map;
419   PetscMPIInt       size,rank;
420 
421   PetscFunctionBegin;
422   /* create special struct aomems */
423   ierr     = PetscNewLog(ao, AO_MemoryScalable, &aomems);CHKERRQ(ierr);
424   ao->data = (void*) aomems;
425   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
426   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
427 
428   /* transmit all local lengths of isapp to all processors */
429   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
430   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
431   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
432   ierr = PetscMalloc2(size,PetscInt,&lens,size,PetscInt,&disp);CHKERRQ(ierr);
433   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
434   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr);
435 
436   N = 0;
437   for (i = 0; i < size; i++) {
438     disp[i] = N;
439     N      += lens[i];
440   }
441 
442   /* If ispetsc is 0 then use "natural" numbering */
443   if (napp) {
444     if (!ispetsc) {
445       start = disp[rank];
446       ierr  = PetscMalloc((napp+1) * sizeof(PetscInt), &petsc);CHKERRQ(ierr);
447       for (i=0; i<napp; i++) petsc[i] = start + i;
448     } else {
449       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
450       petsc = (PetscInt*)mypetsc;
451     }
452   }
453 
454   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
455   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
456   map->bs = 1;
457   map->N  = N;
458   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
459 
460   ao->N       = N;
461   ao->n       = map->n;
462   aomems->map = map;
463 
464   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
465   n_local = map->n;
466   ierr    = PetscMalloc2(n_local,PetscInt, &aomems->app_loc,n_local,PetscInt,&aomems->petsc_loc);CHKERRQ(ierr);
467   ierr    = PetscLogObjectMemory(ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
468   ierr    = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
469   ierr    = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
470   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
471 
472   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
473   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
474 
475   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
476   if (napp) {
477     if (ispetsc) {
478       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
479     } else {
480       ierr = PetscFree(petsc);CHKERRQ(ierr);
481     }
482   }
483   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "AOCreateMemoryScalable"
489 /*@C
490    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
491 
492    Collective on MPI_Comm
493 
494    Input Parameters:
495 +  comm - MPI communicator that is to share AO
496 .  napp - size of integer arrays
497 .  myapp - integer array that defines an ordering
498 -  mypetsc - integer array that defines another ordering (may be NULL to
499              indicate the natural ordering, that is 0,1,2,3,...)
500 
501    Output Parameter:
502 .  aoout - the new application ordering
503 
504    Level: beginner
505 
506     Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
507            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
508            Comparing with AOCreateBasic(), this routine trades memory with message communication.
509 
510 .keywords: AO, create
511 
512 .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
513 @*/
514 PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
515 {
516   PetscErrorCode ierr;
517   IS             isapp,ispetsc;
518   const PetscInt *app=myapp,*petsc=mypetsc;
519 
520   PetscFunctionBegin;
521   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
522   if (mypetsc) {
523     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
524   } else {
525     ispetsc = NULL;
526   }
527   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
528   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
529   if (mypetsc) {
530     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
531   }
532   PetscFunctionReturn(0);
533 }
534 
535 #undef __FUNCT__
536 #define __FUNCT__ "AOCreateMemoryScalableIS"
537 /*@C
538    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
539 
540    Collective on IS
541 
542    Input Parameters:
543 +  isapp - index set that defines an ordering
544 -  ispetsc - index set that defines another ordering (may be NULL to use the
545              natural ordering)
546 
547    Output Parameter:
548 .  aoout - the new application ordering
549 
550    Level: beginner
551 
552     Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
553            that is there cannot be any "holes".
554            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
555 .keywords: AO, create
556 
557 .seealso: AOCreateMemoryScalable(),  AODestroy()
558 @*/
559 PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
560 {
561   PetscErrorCode ierr;
562   MPI_Comm       comm;
563   AO             ao;
564 
565   PetscFunctionBegin;
566   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
567   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
568   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
569   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
570   *aoout = ao;
571   PetscFunctionReturn(0);
572 }
573