xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision d21efd2e5d911db017a545648c4fa4838359bb2d)
1 
2 /*
3     The memory scalable AO application ordering routines. These store the
4   orderings on each processor for that processor's range of values
5 */
6 
7 #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
8 
9 typedef struct {
10   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
11   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
12   PetscLayout map;         /* determines the local sizes of ao */
13 } AO_MemoryScalable;
14 
15 /*
16        All processors ship the data to process 0 to be printed; note that this is not scalable because
17        process 0 allocates space for all the orderings entry across all the processes
18 */
19 PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
20 {
21   PetscErrorCode    ierr;
22   PetscMPIInt       rank,size;
23   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
24   PetscBool         iascii;
25   PetscMPIInt       tag_app,tag_petsc;
26   PetscLayout       map = aomems->map;
27   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
28   MPI_Status        status;
29 
30   PetscFunctionBegin;
31   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
32   PetscCheckFalse(!iascii,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
33 
34   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRMPI(ierr);
35   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRMPI(ierr);
36 
37   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
38   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
39 
40   if (rank == 0) {
41     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %" PetscInt_FMT "\n",ao->N);CHKERRQ(ierr);
42     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
43 
44     ierr = PetscMalloc2(map->N,&app,map->N,&petsc);CHKERRQ(ierr);
45     len  = map->n;
46     /* print local AO */
47     ierr = PetscViewerASCIIPrintf(viewer,"Process [%d]\n",rank);CHKERRQ(ierr);
48     for (i=0; i<len; i++) {
49       ierr = PetscViewerASCIIPrintf(viewer,"%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
50     }
51 
52     /* recv and print off-processor's AO */
53     for (i=1; i<size; i++) {
54       len       = map->range[i+1] - map->range[i];
55       app_loc   = app  + map->range[i];
56       petsc_loc = petsc+ map->range[i];
57       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRMPI(ierr);
58       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRMPI(ierr);
59       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%" PetscInt_FMT "]\n",i);CHKERRQ(ierr);
60       for (j=0; j<len; j++) {
61         ierr = PetscViewerASCIIPrintf(viewer,"%3" PetscInt_FMT "  %3" PetscInt_FMT "    %3" PetscInt_FMT "  %3" PetscInt_FMT "\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
62       }
63     }
64     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
65 
66   } else {
67     /* send values */
68     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRMPI(ierr);
69     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRMPI(ierr);
70   }
71   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 PetscErrorCode AODestroy_MemoryScalable(AO ao)
76 {
77   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
78   PetscErrorCode    ierr;
79 
80   PetscFunctionBegin;
81   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
82   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
83   ierr = PetscFree(aomems);CHKERRQ(ierr);
84   PetscFunctionReturn(0);
85 }
86 
87 /*
88    Input Parameters:
89 +   ao - the application ordering context
90 .   n  - the number of integers in ia[]
91 .   ia - the integers; these are replaced with their mapped value
92 -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
93 
94    Output Parameter:
95 .   ia - the mapped interges
96  */
97 PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,const PetscInt *maploc)
98 {
99   PetscErrorCode    ierr;
100   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
101   MPI_Comm          comm;
102   PetscMPIInt       rank,size,tag1,tag2;
103   PetscInt          *owner,*start,*sizes,nsends,nreceives;
104   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
105   const PetscInt    *owners = aomems->map->range;
106   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
107   MPI_Status        recv_status;
108   PetscMPIInt       nindices,source,widx;
109   PetscInt          *rbuf,*sbuf;
110   MPI_Status        *send_status,*send_status2;
111 
112   PetscFunctionBegin;
113   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
114   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
115   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
116 
117   /*  first count number of contributors to each processor */
118   ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
119   ierr = PetscCalloc2(2*size,&sizes,n,&owner);CHKERRQ(ierr);
120 
121   j       = 0;
122   lastidx = -1;
123   for (i=0; i<n; i++) {
124     if (ia[i] < 0) owner[i] = -1; /* mark negative entries (which are not to be mapped) with a special negative value */
125     if (ia[i] >= ao->N) owner[i] = -2; /* mark out of range entries with special negative value */
126     else {
127       /* if indices are NOT locally sorted, need to start search at the beginning */
128       if (lastidx > (idx = ia[i])) j = 0;
129       lastidx = idx;
130       for (; j<size; j++) {
131         if (idx >= owners[j] && idx < owners[j+1]) {
132           sizes[2*j]++;     /* num of indices to be sent */
133           sizes[2*j+1] = 1; /* send to proc[j] */
134           owner[i]     = j;
135           break;
136         }
137       }
138     }
139   }
140   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
141   nsends        = 0;
142   for (i=0; i<size; i++) nsends += sizes[2*i+1];
143 
144   /* inform other processors of number of messages and max length*/
145   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
146 
147   /* allocate arrays */
148   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
149   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
150 
151   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
152   ierr = PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);CHKERRQ(ierr);
153 
154   ierr = PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
155   ierr = PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);CHKERRQ(ierr);
156 
157   /* post 1st receives: receive others requests
158      since we don't know how long each individual message is we
159      allocate the largest needed buffer for each receive. Potentially
160      this is a lot of wasted space.
161   */
162   for (i=0,count=0; i<nreceives; i++) {
163     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRMPI(ierr);
164   }
165 
166   /* do 1st sends:
167       1) starts[i] gives the starting index in svalues for stuff going to
168          the ith processor
169   */
170   start[0] = 0;
171   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
172   for (i=0; i<n; i++) {
173     j = owner[i];
174     if (j == -1) continue; /* do not remap negative entries in ia[] */
175     else if (j == -2) { /* out of range entries get mapped to -1 */
176       ia[i] = -1;
177       continue;
178     } else if (j != rank) {
179       sindices[start[j]++]  = ia[i];
180     } else { /* compute my own map */
181       ia[i] = maploc[ia[i]-owners[rank]];
182     }
183   }
184 
185   start[0] = 0;
186   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
187   for (i=0,count=0; i<size; i++) {
188     if (sizes[2*i+1]) {
189       /* send my request to others */
190       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRMPI(ierr);
191       /* post receive for the answer of my request */
192       ierr = MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRMPI(ierr);
193       count++;
194     }
195   }
196   PetscCheckFalse(nsends != count,comm,PETSC_ERR_SUP,"nsends %" PetscInt_FMT " != count %" PetscInt_FMT,nsends,count);
197 
198   /* wait on 1st sends */
199   if (nsends) {
200     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRMPI(ierr);
201   }
202 
203   /* 1st recvs: other's requests */
204   for (j=0; j< nreceives; j++) {
205     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRMPI(ierr); /* idx: index of handle for operation that completed */
206     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr);
207     rbuf   = rindices+nmax*widx; /* global index */
208     source = recv_status.MPI_SOURCE;
209 
210     /* compute mapping */
211     sbuf = rbuf;
212     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
213 
214     /* send mapping back to the sender */
215     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRMPI(ierr);
216   }
217 
218   /* wait on 2nd sends */
219   if (nreceives) {
220     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRMPI(ierr);
221   }
222 
223   /* 2nd recvs: for the answer of my request */
224   for (j=0; j< nsends; j++) {
225     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRMPI(ierr);
226     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr);
227     source = recv_status.MPI_SOURCE;
228     /* pack output ia[] */
229     rbuf  = sindices2+start[source];
230     count = 0;
231     for (i=0; i<n; i++) {
232       if (source == owner[i]) ia[i] = rbuf[count++];
233     }
234   }
235 
236   /* free arrays */
237   ierr = PetscFree(start);CHKERRQ(ierr);
238   ierr = PetscFree2(sizes,owner);CHKERRQ(ierr);
239   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
240   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
241   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
242   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
247 {
248   PetscErrorCode    ierr;
249   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
250   PetscInt          *app_loc = aomems->app_loc;
251 
252   PetscFunctionBegin;
253   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
254   PetscFunctionReturn(0);
255 }
256 
257 PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
258 {
259   PetscErrorCode    ierr;
260   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
261   PetscInt          *petsc_loc = aomems->petsc_loc;
262 
263   PetscFunctionBegin;
264   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 static struct _AOOps AOOps_MemoryScalable = {
269   PetscDesignatedInitializer(view,AOView_MemoryScalable),
270   PetscDesignatedInitializer(destroy,AODestroy_MemoryScalable),
271   PetscDesignatedInitializer(petsctoapplication,AOPetscToApplication_MemoryScalable),
272   PetscDesignatedInitializer(applicationtopetsc,AOApplicationToPetsc_MemoryScalable),
273 };
274 
275 PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
276 {
277   PetscErrorCode    ierr;
278   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
279   PetscLayout       map     = aomems->map;
280   PetscInt          n_local = map->n,i,j;
281   PetscMPIInt       rank,size,tag;
282   PetscInt          *owner,*start,*sizes,nsends,nreceives;
283   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
284   PetscInt          *owners = aomems->map->range;
285   MPI_Request       *send_waits,*recv_waits;
286   MPI_Status        recv_status;
287   PetscMPIInt       nindices,widx;
288   PetscInt          *rbuf;
289   PetscInt          n=napp,ip,ia;
290   MPI_Status        *send_status;
291 
292   PetscFunctionBegin;
293   ierr = PetscArrayzero(aomap_loc,n_local);CHKERRQ(ierr);
294 
295   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
296   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
297 
298   /*  first count number of contributors (of from_array[]) to each processor */
299   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
300   ierr = PetscMalloc1(n,&owner);CHKERRQ(ierr);
301 
302   j       = 0;
303   lastidx = -1;
304   for (i=0; i<n; i++) {
305     /* if indices are NOT locally sorted, need to start search at the beginning */
306     if (lastidx > (idx = from_array[i])) j = 0;
307     lastidx = idx;
308     for (; j<size; j++) {
309       if (idx >= owners[j] && idx < owners[j+1]) {
310         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
311         sizes[2*j+1] = 1; /* send to proc[j] */
312         owner[i]     = j;
313         break;
314       }
315     }
316   }
317   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
318   nsends        = 0;
319   for (i=0; i<size; i++) nsends += sizes[2*i+1];
320 
321   /* inform other processors of number of messages and max length*/
322   ierr = PetscMaxSum(comm,sizes,&nmax,&nreceives);CHKERRQ(ierr);
323 
324   /* allocate arrays */
325   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
326   ierr = PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);CHKERRQ(ierr);
327   ierr = PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
328   ierr = PetscMalloc1(size,&start);CHKERRQ(ierr);
329 
330   /* post receives: */
331   for (i=0; i<nreceives; i++) {
332     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRMPI(ierr);
333   }
334 
335   /* do sends:
336       1) starts[i] gives the starting index in svalues for stuff going to
337          the ith processor
338   */
339   start[0] = 0;
340   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
341   for (i=0; i<n; i++) {
342     j = owner[i];
343     if (j != rank) {
344       ip                   = from_array[i];
345       ia                   = to_array[i];
346       sindices[start[j]++] = ip;
347       sindices[start[j]++] = ia;
348     } else { /* compute my own map */
349       ip            = from_array[i] - owners[rank];
350       ia            = to_array[i];
351       aomap_loc[ip] = ia;
352     }
353   }
354 
355   start[0] = 0;
356   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
357   for (i=0,count=0; i<size; i++) {
358     if (sizes[2*i+1]) {
359       ierr = MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRMPI(ierr);
360       count++;
361     }
362   }
363   PetscCheckFalse(nsends != count,comm,PETSC_ERR_SUP,"nsends %" PetscInt_FMT " != count %" PetscInt_FMT,nsends,count);
364 
365   /* wait on sends */
366   if (nsends) {
367     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRMPI(ierr);
368   }
369 
370   /* recvs */
371   count=0;
372   for (j= nreceives; j>0; j--) {
373     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRMPI(ierr);
374     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRMPI(ierr);
375     rbuf = rindices+nmax*widx; /* global index */
376 
377     /* compute local mapping */
378     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
379       ip            = rbuf[i] - owners[rank]; /* local index */
380       ia            = rbuf[i+1];
381       aomap_loc[ip] = ia;
382     }
383     count++;
384   }
385 
386   ierr = PetscFree(start);CHKERRQ(ierr);
387   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
388   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
389   ierr = PetscFree(owner);CHKERRQ(ierr);
390   ierr = PetscFree(sizes);CHKERRQ(ierr);
391   PetscFunctionReturn(0);
392 }
393 
394 PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
395 {
396   PetscErrorCode    ierr;
397   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
398   const PetscInt    *mypetsc,*myapp;
399   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
400   MPI_Comm          comm;
401   AO_MemoryScalable *aomems;
402   PetscLayout       map;
403   PetscMPIInt       size,rank;
404 
405   PetscFunctionBegin;
406   PetscCheckFalse(!isapp,PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
407   /* create special struct aomems */
408   ierr     = PetscNewLog(ao,&aomems);CHKERRQ(ierr);
409   ao->data = (void*) aomems;
410   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
411   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
412 
413   /* transmit all local lengths of isapp to all processors */
414   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
415   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
416   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
417   ierr = PetscMalloc2(size,&lens,size,&disp);CHKERRQ(ierr);
418   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
419   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRMPI(ierr);
420 
421   N = 0;
422   for (i = 0; i < size; i++) {
423     disp[i] = N;
424     N      += lens[i];
425   }
426 
427   /* If ispetsc is 0 then use "natural" numbering */
428   if (napp) {
429     if (!ispetsc) {
430       start = disp[rank];
431       ierr  = PetscMalloc1(napp+1, &petsc);CHKERRQ(ierr);
432       for (i=0; i<napp; i++) petsc[i] = start + i;
433     } else {
434       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
435       petsc = (PetscInt*)mypetsc;
436     }
437   } else {
438     petsc = NULL;
439   }
440 
441   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
442   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
443   map->bs = 1;
444   map->N  = N;
445   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
446 
447   ao->N       = N;
448   ao->n       = map->n;
449   aomems->map = map;
450 
451   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
452   n_local = map->n;
453   ierr    = PetscCalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);CHKERRQ(ierr);
454   ierr    = PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
455   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
456 
457   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
458   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
459 
460   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
461   if (napp) {
462     if (ispetsc) {
463       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
464     } else {
465       ierr = PetscFree(petsc);CHKERRQ(ierr);
466     }
467   }
468   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 /*@C
473    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
474 
475    Collective
476 
477    Input Parameters:
478 +  comm - MPI communicator that is to share AO
479 .  napp - size of integer arrays
480 .  myapp - integer array that defines an ordering
481 -  mypetsc - integer array that defines another ordering (may be NULL to
482              indicate the natural ordering, that is 0,1,2,3,...)
483 
484    Output Parameter:
485 .  aoout - the new application ordering
486 
487    Level: beginner
488 
489     Notes:
490     The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
491            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
492            Comparing with AOCreateBasic(), this routine trades memory with message communication.
493 
494 .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
495 @*/
496 PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
497 {
498   PetscErrorCode ierr;
499   IS             isapp,ispetsc;
500   const PetscInt *app=myapp,*petsc=mypetsc;
501 
502   PetscFunctionBegin;
503   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
504   if (mypetsc) {
505     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
506   } else {
507     ispetsc = NULL;
508   }
509   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
510   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
511   if (mypetsc) {
512     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
513   }
514   PetscFunctionReturn(0);
515 }
516 
517 /*@C
518    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
519 
520    Collective on IS
521 
522    Input Parameters:
523 +  isapp - index set that defines an ordering
524 -  ispetsc - index set that defines another ordering (may be NULL to use the
525              natural ordering)
526 
527    Output Parameter:
528 .  aoout - the new application ordering
529 
530    Level: beginner
531 
532     Notes:
533     The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
534            that is there cannot be any "holes".
535            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
536 .seealso: AOCreateMemoryScalable(),  AODestroy()
537 @*/
538 PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
539 {
540   PetscErrorCode ierr;
541   MPI_Comm       comm;
542   AO             ao;
543 
544   PetscFunctionBegin;
545   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
546   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
547   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
548   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
549   ierr   = AOViewFromOptions(ao,NULL,"-ao_view");CHKERRQ(ierr);
550   *aoout = ao;
551   PetscFunctionReturn(0);
552 }
553