xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*
3     The memory scalable AO application ordering routines. These store the
4   local orderings on each processor.
5 */
6 
7 #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
8 
9 typedef struct {
10   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
11   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
12   PetscLayout map;         /* determines the local sizes of ao */
13 } AO_MemoryScalable;
14 
15 /*
16        All processors have the same data so processor 1 prints it
17 */
18 #undef __FUNCT__
19 #define __FUNCT__ "AOView_MemoryScalable"
20 PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
21 {
22   PetscErrorCode    ierr;
23   PetscMPIInt       rank,size;
24   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
25   PetscBool         iascii;
26   PetscMPIInt       tag_app,tag_petsc;
27   PetscLayout       map = aomems->map;
28   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
29   MPI_Status        status;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
33   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
34 
35   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr);
36   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr);
37 
38   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
39   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
40 
41   if (!rank) {
42     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr);
43     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
44 
45     ierr = PetscMalloc2(map->N,&app,map->N,&petsc);CHKERRQ(ierr);
46     len  = map->n;
47     /* print local AO */
48     ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);CHKERRQ(ierr);
49     for (i=0; i<len; i++) {
50       ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
51     }
52 
53     /* recv and print off-processor's AO */
54     for (i=1; i<size; i++) {
55       len       = map->range[i+1] - map->range[i];
56       app_loc   = app  + map->range[i];
57       petsc_loc = petsc+ map->range[i];
58       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
59       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
60       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr);
61       for (j=0; j<len; j++) {
62         ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
63       }
64     }
65     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
66 
67   } else {
68     /* send values */
69     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
70     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
71   }
72   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "AODestroy_MemoryScalable"
78 PetscErrorCode AODestroy_MemoryScalable(AO ao)
79 {
80   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
81   PetscErrorCode    ierr;
82 
83   PetscFunctionBegin;
84   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
85   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
86   ierr = PetscFree(aomems);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 /*
91    Input Parameters:
92 +   ao - the application ordering context
93 .   n  - the number of integers in ia[]
94 .   ia - the integers; these are replaced with their mapped value
95 -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
96 
97    Output Parameter:
98 .   ia - the mapped interges
99  */
100 #undef __FUNCT__
101 #define __FUNCT__ "AOMap_MemoryScalable_private"
102 PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
103 {
104   PetscErrorCode    ierr;
105   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
106   MPI_Comm          comm;
107   PetscMPIInt       rank,size,tag1,tag2;
108   PetscInt          *owner,*start,*sizes,nsends,nreceives;
109   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
110   PetscInt          *owners = aomems->map->range;
111   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
112   MPI_Status        recv_status;
113   PetscMPIInt       nindices,source,widx;
114   PetscInt          *rbuf,*sbuf;
115   MPI_Status        *send_status,*send_status2;
116 
117   PetscFunctionBegin;
118   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
119   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
120   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
121 
122   /*  first count number of contributors to each processor */
123   ierr = PetscMalloc2(2*size,&sizes,size,&start);CHKERRQ(ierr);
124   ierr = PetscMemzero(sizes,2*size*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = PetscCalloc1(n,&owner);CHKERRQ(ierr);
126 
127   j       = 0;
128   lastidx = -1;
129   for (i=0; i<n; i++) {
130     /* if indices are NOT locally sorted, need to start search at the beginning */
131     if (lastidx > (idx = ia[i])) j = 0;
132     lastidx = idx;
133     for (; j<size; j++) {
134       if (idx >= owners[j] && idx < owners[j+1]) {
135         sizes[2*j]++;     /* num of indices to be sent */
136         sizes[2*j+1] = 1; /* send to proc[j] */
137         owner[i]      = j;
138         break;
139       }
140     }
141   }
142   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
143   nsends        = 0;
144   for (i=0; i<size; i++) nsends += sizes[2*i+1];
145 
146   /* inform other processors of number of messages and max length*/
147   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
148 
149   /* allocate arrays */
150   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
151   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
152 
153   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
154   ierr = PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);CHKERRQ(ierr);
155 
156   ierr = PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
157   ierr = PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);CHKERRQ(ierr);
158 
159   /* post 1st receives: receive others requests
160      since we don't know how long each individual message is we
161      allocate the largest needed buffer for each receive. Potentially
162      this is a lot of wasted space.
163   */
164   for (i=0,count=0; i<nreceives; i++) {
165     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
166   }
167 
168   /* do 1st sends:
169       1) starts[i] gives the starting index in svalues for stuff going to
170          the ith processor
171   */
172   start[0] = 0;
173   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
174   for (i=0; i<n; i++) {
175     j = owner[i];
176     if (j != rank) {
177       sindices[start[j]++]  = ia[i];
178     } else { /* compute my own map */
179       if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
180         ia[i] = maploc[ia[i]-owners[rank]];
181       } else {
182         ia[i] = -1;  /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
183       }
184     }
185   }
186 
187   start[0] = 0;
188   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
189   for (i=0,count=0; i<size; i++) {
190     if (sizes[2*i+1]) {
191       /* send my request to others */
192       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr);
193       /* post receive for the answer of my request */
194       ierr = MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr);
195       count++;
196     }
197   }
198   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
199 
200   /* wait on 1st sends */
201   if (nsends) {
202     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
203   }
204 
205   /* 1st recvs: other's requests */
206   for (j=0; j< nreceives; j++) {
207     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */
208     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
209     rbuf   = rindices+nmax*widx; /* global index */
210     source = recv_status.MPI_SOURCE;
211 
212     /* compute mapping */
213     sbuf = rbuf;
214     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
215 
216     /* send mapping back to the sender */
217     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr);
218   }
219 
220   /* wait on 2nd sends */
221   if (nreceives) {
222     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr);
223   }
224 
225   /* 2nd recvs: for the answer of my request */
226   for (j=0; j< nsends; j++) {
227     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr);
228     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
229     source = recv_status.MPI_SOURCE;
230     /* pack output ia[] */
231     rbuf  = sindices2+start[source];
232     count = 0;
233     for (i=0; i<n; i++) {
234       if (source == owner[i]) ia[i] = rbuf[count++];
235     }
236   }
237 
238   /* free arrays */
239   ierr = PetscFree2(sizes,start);CHKERRQ(ierr);
240   ierr = PetscFree(owner);CHKERRQ(ierr);
241   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
242   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
243   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
244   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "AOPetscToApplication_MemoryScalable"
250 PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
251 {
252   PetscErrorCode    ierr;
253   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
254   PetscInt          *app_loc = aomems->app_loc;
255 
256   PetscFunctionBegin;
257   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "AOApplicationToPetsc_MemoryScalable"
263 PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
264 {
265   PetscErrorCode    ierr;
266   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
267   PetscInt          *petsc_loc = aomems->petsc_loc;
268 
269   PetscFunctionBegin;
270   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 static struct _AOOps AOOps_MemoryScalable = {
275   AOView_MemoryScalable,
276   AODestroy_MemoryScalable,
277   AOPetscToApplication_MemoryScalable,
278   AOApplicationToPetsc_MemoryScalable,
279   0,
280   0,
281   0,
282   0
283 };
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "AOCreateMemoryScalable_private"
287 PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
288 {
289   PetscErrorCode    ierr;
290   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
291   PetscLayout       map     = aomems->map;
292   PetscInt          n_local = map->n,i,j;
293   PetscMPIInt       rank,size,tag;
294   PetscInt          *owner,*start,*sizes,nsends,nreceives;
295   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
296   PetscInt          *owners = aomems->map->range;
297   MPI_Request       *send_waits,*recv_waits;
298   MPI_Status        recv_status;
299   PetscMPIInt       nindices,widx;
300   PetscInt          *rbuf;
301   PetscInt          n=napp,ip,ia;
302   MPI_Status        *send_status;
303 
304   PetscFunctionBegin;
305   ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
306 
307   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
308   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
309 
310   /*  first count number of contributors (of from_array[]) to each processor */
311   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
312   ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr);
313 
314   j       = 0;
315   lastidx = -1;
316   for (i=0; i<n; i++) {
317     /* if indices are NOT locally sorted, need to start search at the beginning */
318     if (lastidx > (idx = from_array[i])) j = 0;
319     lastidx = idx;
320     for (; j<size; j++) {
321       if (idx >= owners[j] && idx < owners[j+1]) {
322         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
323         sizes[2*j+1] = 1; /* send to proc[j] */
324         owner[i]      = j;
325         break;
326       }
327     }
328   }
329   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
330   nsends        = 0;
331   for (i=0; i<size; i++) nsends += sizes[2*i+1];
332 
333   /* inform other processors of number of messages and max length*/
334   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
335 
336   /* allocate arrays */
337   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
338   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
339   ierr = PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
340   ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
341 
342   /* post receives: */
343   for (i=0; i<nreceives; i++) {
344     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
345   }
346 
347   /* do sends:
348       1) starts[i] gives the starting index in svalues for stuff going to
349          the ith processor
350   */
351   start[0] = 0;
352   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
353   for (i=0; i<n; i++) {
354     j = owner[i];
355     if (j != rank) {
356       ip                   = from_array[i];
357       ia                   = to_array[i];
358       sindices[start[j]++] = ip;
359       sindices[start[j]++] = ia;
360     } else { /* compute my own map */
361       ip            = from_array[i] - owners[rank];
362       ia            = to_array[i];
363       aomap_loc[ip] = ia;
364     }
365   }
366 
367   start[0] = 0;
368   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
369   for (i=0,count=0; i<size; i++) {
370     if (sizes[2*i+1]) {
371       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
372       count++;
373     }
374   }
375   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
376 
377   /* wait on sends */
378   if (nsends) {
379     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
380   }
381 
382   /* recvs */
383   count=0;
384   for (j= nreceives; j>0; j--) {
385     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr);
386     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
387     rbuf = rindices+nmax*widx; /* global index */
388 
389     /* compute local mapping */
390     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
391       ip            = rbuf[i] - owners[rank]; /* local index */
392       ia            = rbuf[i+1];
393       aomap_loc[ip] = ia;
394     }
395     count++;
396   }
397 
398   ierr = PetscFree(start);CHKERRQ(ierr);
399   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
400   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
401   ierr = PetscFree(owner);CHKERRQ(ierr);
402   ierr = PetscFree(sizes);CHKERRQ(ierr);
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "AOCreate_MemoryScalable"
408 PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
409 {
410   PetscErrorCode    ierr;
411   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
412   const PetscInt    *mypetsc,*myapp;
413   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
414   MPI_Comm          comm;
415   AO_MemoryScalable *aomems;
416   PetscLayout       map;
417   PetscMPIInt       size,rank;
418 
419   PetscFunctionBegin;
420   /* create special struct aomems */
421   ierr     = PetscNewLog(ao,&aomems);CHKERRQ(ierr);
422   ao->data = (void*) aomems;
423   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
424   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
425 
426   /* transmit all local lengths of isapp to all processors */
427   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
428   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
429   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
430   ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr);
431   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
432   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr);
433 
434   N = 0;
435   for (i = 0; i < size; i++) {
436     disp[i] = N;
437     N      += lens[i];
438   }
439 
440   /* If ispetsc is 0 then use "natural" numbering */
441   if (napp) {
442     if (!ispetsc) {
443       start = disp[rank];
444       ierr  = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr);
445       for (i=0; i<napp; i++) petsc[i] = start + i;
446     } else {
447       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
448       petsc = (PetscInt*)mypetsc;
449     }
450   }
451 
452   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
453   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
454   map->bs = 1;
455   map->N  = N;
456   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
457 
458   ao->N       = N;
459   ao->n       = map->n;
460   aomems->map = map;
461 
462   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
463   n_local = map->n;
464   ierr    = PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr);
465   ierr    = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
466   ierr    = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
467   ierr    = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
468   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
469 
470   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
471   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
472 
473   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
474   if (napp) {
475     if (ispetsc) {
476       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
477     } else {
478       ierr = PetscFree(petsc);CHKERRQ(ierr);
479     }
480   }
481   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "AOCreateMemoryScalable"
487 /*@C
488    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
489 
490    Collective on MPI_Comm
491 
492    Input Parameters:
493 +  comm - MPI communicator that is to share AO
494 .  napp - size of integer arrays
495 .  myapp - integer array that defines an ordering
496 -  mypetsc - integer array that defines another ordering (may be NULL to
497              indicate the natural ordering, that is 0,1,2,3,...)
498 
499    Output Parameter:
500 .  aoout - the new application ordering
501 
502    Level: beginner
503 
504     Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
505            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
506            Comparing with AOCreateBasic(), this routine trades memory with message communication.
507 
508 .keywords: AO, create
509 
510 .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
511 @*/
512 PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
513 {
514   PetscErrorCode ierr;
515   IS             isapp,ispetsc;
516   const PetscInt *app=myapp,*petsc=mypetsc;
517 
518   PetscFunctionBegin;
519   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
520   if (mypetsc) {
521     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
522   } else {
523     ispetsc = NULL;
524   }
525   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
526   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
527   if (mypetsc) {
528     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "AOCreateMemoryScalableIS"
535 /*@C
536    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
537 
538    Collective on IS
539 
540    Input Parameters:
541 +  isapp - index set that defines an ordering
542 -  ispetsc - index set that defines another ordering (may be NULL to use the
543              natural ordering)
544 
545    Output Parameter:
546 .  aoout - the new application ordering
547 
548    Level: beginner
549 
550     Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
551            that is there cannot be any "holes".
552            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
553 .keywords: AO, create
554 
555 .seealso: AOCreateMemoryScalable(),  AODestroy()
556 @*/
557 PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
558 {
559   PetscErrorCode ierr;
560   MPI_Comm       comm;
561   AO             ao;
562 
563   PetscFunctionBegin;
564   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
565   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
566   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
567   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
568   ierr   = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr);
569   *aoout = ao;
570   PetscFunctionReturn(0);
571 }
572