xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a703fe332e8dbffda8e152a45ef88862202e61cd)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.87 1995/10/11 17:55:12 curfman Exp curfman $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscZero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int ierr;
33   if (aij->numtids == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                                int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (idxn[j] >= cstart && idxn[j] < cend){
61           col = idxn[j] - cstart;
62           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
63         }
64         else {
65           if (aij->assembled) {
66             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67             col = aij->colmap[idxn[j]] + shift;
68             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
69               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
70               col =  idxn[j];
71             }
72           }
73           else col = idxn[j];
74           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
75         }
76       }
77     }
78     else {
79       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
80     }
81   }
82   return 0;
83 }
84 
85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
86 {
87   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
88   MPI_Comm    comm = mat->comm;
89   int         numtids = aij->numtids, *owners = aij->rowners;
90   int         mytid = aij->mytid,tag = mat->tag, *owner,*starts,count,ierr;
91   MPI_Request *send_waits,*recv_waits;
92   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
93   InsertMode  addv;
94   Scalar      *rvalues,*svalues;
95 
96   /* make sure all processors are either in INSERTMODE or ADDMODE */
97   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
98   if (addv == (ADD_VALUES|INSERT_VALUES)) {
99     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
100   }
101   aij->insertmode = addv; /* in case this processor had no cache */
102 
103   /*  first count number of contributors to each processor */
104   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
105   PetscZero(nprocs,2*numtids*sizeof(int)); procs = nprocs + numtids;
106   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
107   for ( i=0; i<aij->stash.n; i++ ) {
108     idx = aij->stash.idx[i];
109     for ( j=0; j<numtids; j++ ) {
110       if (idx >= owners[j] && idx < owners[j+1]) {
111         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
112       }
113     }
114   }
115   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
116 
117   /* inform other processors of number of messages and max length*/
118   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
119   MPI_Allreduce(procs, work,numtids,MPI_INT,MPI_SUM,comm);
120   nreceives = work[mytid];
121   MPI_Allreduce( nprocs, work,numtids,MPI_INT,MPI_MAX,comm);
122   nmax = work[mytid];
123   PETSCFREE(work);
124 
125   /* post receives:
126        1) each message will consist of ordered pairs
127      (global index,value) we store the global index as a double
128      to simplify the message passing.
129        2) since we don't know how long each individual message is we
130      allocate the largest needed buffer for each receive. Potentially
131      this is a lot of wasted space.
132 
133 
134        This could be done better.
135   */
136   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
137   CHKPTRQ(rvalues);
138   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
139   CHKPTRQ(recv_waits);
140   for ( i=0; i<nreceives; i++ ) {
141     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
142               comm,recv_waits+i);
143   }
144 
145   /* do sends:
146       1) starts[i] gives the starting index in svalues for stuff going to
147          the ith processor
148   */
149   svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
150   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
151   CHKPTRQ(send_waits);
152   starts = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(starts);
153   starts[0] = 0;
154   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
155   for ( i=0; i<aij->stash.n; i++ ) {
156     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
157     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
158     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
159   }
160   PETSCFREE(owner);
161   starts[0] = 0;
162   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   count = 0;
164   for ( i=0; i<numtids; i++ ) {
165     if (procs[i]) {
166       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
167                 comm,send_waits+count++);
168     }
169   }
170   PETSCFREE(starts); PETSCFREE(nprocs);
171 
172   /* Free cache space */
173   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
174 
175   aij->svalues    = svalues;    aij->rvalues    = rvalues;
176   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
177   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
178   aij->rmax       = nmax;
179 
180   return 0;
181 }
182 extern int MatSetUpMultiply_MPIAIJ(Mat);
183 
184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
187   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
188   MPI_Status  *send_status,recv_status;
189   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
190   int         row,col,other_disassembled,shift = C->indexshift;
191   Scalar      *values,val;
192   InsertMode  addv = aij->insertmode;
193 
194   /*  wait on receives */
195   while (count) {
196     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
197     /* unpack receives into our local space */
198     values = aij->rvalues + 3*imdex*aij->rmax;
199     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
200     n = n/3;
201     for ( i=0; i<n; i++ ) {
202       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
203       col = (int) PETSCREAL(values[3*i+1]);
204       val = values[3*i+2];
205       if (col >= aij->cstart && col < aij->cend) {
206         col -= aij->cstart;
207         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
208       }
209       else {
210         if (aij->assembled) {
211           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
212           col = aij->colmap[col] + shift;
213           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
214             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
215             col = (int) PETSCREAL(values[3*i+1]);
216           }
217         }
218         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
219       }
220     }
221     count--;
222   }
223   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
224 
225   /* wait on sends */
226   if (aij->nsends) {
227     send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status));
228     CHKPTRQ(send_status);
229     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
230     PETSCFREE(send_status);
231   }
232   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
233 
234   aij->insertmode = NOT_SET_VALUES;
235   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
237 
238   /* determine if any processor has disassembled, if so we must
239      also disassemble ourselfs, in order that we may reassemble. */
240   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
241   if (aij->assembled && !other_disassembled) {
242     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
243   }
244 
245   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
246     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
247     aij->assembled = 1;
248   }
249   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
251 
252   return 0;
253 }
254 
255 static int MatZeroEntries_MPIAIJ(Mat A)
256 {
257   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
258   int        ierr;
259   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
260   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
261   return 0;
262 }
263 
264 /* the code does not do the diagonal entries correctly unless the
265    matrix is square and the column and row owerships are identical.
266    This is a BUG. The only way to fix it seems to be to access
267    aij->A and aij->B directly and not through the MatZeroRows()
268    routine.
269 */
270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
271 {
272   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
273   int            i,ierr,N, *rows,*owners = l->rowners,numtids = l->numtids;
274   int            *procs,*nprocs,j,found,idx,nsends,*work;
275   int            nmax,*svalues,*starts,*owner,nrecvs,mytid = l->mytid;
276   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
277   int            *lens,imdex,*lrows,*values;
278   MPI_Comm       comm = A->comm;
279   MPI_Request    *send_waits,*recv_waits;
280   MPI_Status     recv_status,*send_status;
281   IS             istmp;
282 
283   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
284   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
285   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
286 
287   /*  first count number of contributors to each processor */
288   nprocs = (int *) PETSCMALLOC( 2*numtids*sizeof(int) ); CHKPTRQ(nprocs);
289   PetscZero(nprocs,2*numtids*sizeof(int)); procs = nprocs + numtids;
290   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<numtids; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<numtids; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work = (int *) PETSCMALLOC( numtids*sizeof(int) ); CHKPTRQ(work);
305   MPI_Allreduce( procs, work,numtids,MPI_INT,MPI_SUM,comm);
306   nrecvs = work[mytid];
307   MPI_Allreduce( nprocs, work,numtids,MPI_INT,MPI_MAX,comm);
308   nmax = work[mytid];
309   PETSCFREE(work);
310 
311   /* post receives:   */
312   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
313   CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
315   CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
326   CHKPTRQ(send_waits);
327   starts = (int *) PETSCMALLOC( (numtids+1)*sizeof(int) ); CHKPTRQ(starts);
328   starts[0] = 0;
329   for ( i=1; i<numtids; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
330   for ( i=0; i<N; i++ ) {
331     svalues[starts[owner[i]]++] = rows[i];
332   }
333   ISRestoreIndices(is,&rows);
334 
335   starts[0] = 0;
336   for ( i=1; i<numtids+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
337   count = 0;
338   for ( i=0; i<numtids; i++ ) {
339     if (procs[i]) {
340       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
341     }
342   }
343   PETSCFREE(starts);
344 
345   base = owners[mytid];
346 
347   /*  wait on receives */
348   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
349   source = lens + nrecvs;
350   count  = nrecvs; slen = 0;
351   while (count) {
352     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
353     /* unpack receives into our local space */
354     MPI_Get_count(&recv_status,MPI_INT,&n);
355     source[imdex]  = recv_status.MPI_SOURCE;
356     lens[imdex]  = n;
357     slen += n;
358     count--;
359   }
360   PETSCFREE(recv_waits);
361 
362   /* move the data into the send scatter */
363   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
364   count = 0;
365   for ( i=0; i<nrecvs; i++ ) {
366     values = rvalues + i*nmax;
367     for ( j=0; j<lens[i]; j++ ) {
368       lrows[count++] = values[j] - base;
369     }
370   }
371   PETSCFREE(rvalues); PETSCFREE(lens);
372   PETSCFREE(owner); PETSCFREE(nprocs);
373 
374   /* actually zap the local rows */
375   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
376   PLogObjectParent(A,istmp);
377   PETSCFREE(lrows);
378   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
379   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
380   ierr = ISDestroy(istmp); CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
385     CHKPTRQ(send_status);
386     MPI_Waitall(nsends,send_waits,send_status);
387     PETSCFREE(send_status);
388   }
389   PETSCFREE(send_waits); PETSCFREE(svalues);
390 
391   return 0;
392 }
393 
394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
395 {
396   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
397   int        ierr;
398 
399   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
400   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
402   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
403   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
404   return 0;
405 }
406 
407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
410   int        ierr;
411   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
412   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
416   return 0;
417 }
418 
419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
422   int        ierr;
423 
424   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
425   /* do nondiagonal part */
426   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
427   /* send it on its way */
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
429                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
430   /* do local part */
431   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
432   /* receive remote parts: note this assumes the values are not actually */
433   /* inserted in yy until the next line, which is true for my implementation*/
434   /* but is not perhaps always true. */
435   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
436                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
437   return 0;
438 }
439 
440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
443   int        ierr;
444 
445   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
450                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
451   /* do local part */
452   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
457                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
458   return 0;
459 }
460 
461 /*
462   This only works correctly for square matrices where the subblock A->A is the
463    diagonal block
464 */
465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
466 {
467   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
468   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
469   return MatGetDiagonal(a->A,v);
470 }
471 
472 static int MatDestroy_MPIAIJ(PetscObject obj)
473 {
474   Mat        mat = (Mat) obj;
475   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
476   int        ierr;
477 #if defined(PETSC_LOG)
478   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
479 #endif
480   PETSCFREE(aij->rowners);
481   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
482   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
483   if (aij->colmap) PETSCFREE(aij->colmap);
484   if (aij->garray) PETSCFREE(aij->garray);
485   if (aij->lvec)   VecDestroy(aij->lvec);
486   if (aij->Mvctx)  VecScatterCtxDestroy(aij->Mvctx);
487   PETSCFREE(aij);
488   PLogObjectDestroy(mat);
489   PETSCHEADERDESTROY(mat);
490   return 0;
491 }
492 #include "draw.h"
493 #include "pinclude/pviewer.h"
494 
495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
496 {
497   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
498   int         ierr;
499 
500   if (aij->numtids == 1) {
501     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
502   }
503   return 0;
504 }
505 
506 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer)
507 {
508   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
509   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
510   int         ierr, format,shift = C->indexshift;
511   PetscObject vobj = (PetscObject) viewer;
512   FILE        *fd;
513 
514   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
515     ierr = ViewerFileGetFormat_Private(viewer,&format);
516     if (format == FILE_FORMAT_INFO) {
517       return 0;
518     }
519   }
520 
521   if (vobj->type == ASCII_FILE_VIEWER) {
522     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
523     MPIU_Seq_begin(mat->comm,1);
524     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
525            aij->mytid,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
526            aij->cend);
527     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
528     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
529     fflush(fd);
530     MPIU_Seq_end(mat->comm,1);
531   }
532   else {
533     int numtids = aij->numtids, mytid = aij->mytid;
534     if (numtids == 1) {
535       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
536     }
537     else {
538       /* assemble the entire matrix onto first processor. */
539       Mat         A;
540       Mat_SeqAIJ *Aloc;
541       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
542       Scalar      *a;
543 
544       if (!mytid) {
545         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
546       }
547       else {
548         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
549       }
550       PLogObjectParent(mat,A);
551 
552 
553       /* copy over the A part */
554       Aloc = (Mat_SeqAIJ*) aij->A->data;
555       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
556       row = aij->rstart;
557       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
558       for ( i=0; i<m; i++ ) {
559         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
560         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
561       }
562       aj = Aloc->j;
563       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
564 
565       /* copy over the B part */
566       Aloc = (Mat_SeqAIJ*) aij->B->data;
567       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
568       row = aij->rstart;
569       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
570       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
571       for ( i=0; i<m; i++ ) {
572         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
573         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
574       }
575       PETSCFREE(ct);
576       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
577       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
578       if (!mytid) {
579         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
580       }
581       ierr = MatDestroy(A); CHKERRQ(ierr);
582     }
583   }
584   return 0;
585 }
586 
587 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
588 {
589   Mat         mat = (Mat) obj;
590   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
591   int         ierr;
592   PetscObject vobj = (PetscObject) viewer;
593 
594   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
595   if (!viewer) {
596     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
597   }
598   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
599   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
600     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
601   }
602   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
603     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
604   }
605   else if (vobj->cookie == DRAW_COOKIE) {
606     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
607   }
608   else if (vobj->type == BINARY_FILE_VIEWER) {
609     return MatView_MPIAIJ_Binary(mat,viewer);
610   }
611   return 0;
612 }
613 
614 extern int MatMarkDiag_SeqAIJ(Mat);
615 /*
616     This has to provide several versions.
617 
618      1) per sequential
619      2) a) use only local smoothing updating outer values only once.
620         b) local smoothing updating outer values each inner iteration
621      3) color updating out values betwen colors.
622 */
623 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
624                            double fshift,int its,Vec xx)
625 {
626   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
627   Mat        AA = mat->A, BB = mat->B;
628   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
629   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
630   int        ierr,*idx, *diag;
631   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
632   Vec        tt;
633 
634   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
635 
636   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
637   xs = x + shift; /* shift by one for index start of 1 */
638   ls = ls + shift;
639   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
640   diag = A->diag;
641   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
642     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
643   }
644   if (flag & SOR_EISENSTAT) {
645     /* Let  A = L + U + D; where L is lower trianglar,
646     U is upper triangular, E is diagonal; This routine applies
647 
648             (L + E)^{-1} A (U + E)^{-1}
649 
650     to a vector efficiently using Eisenstat's trick. This is for
651     the case of SSOR preconditioner, so E is D/omega where omega
652     is the relaxation factor.
653     */
654     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
655     PLogObjectParent(matin,tt);
656     VecGetArray(tt,&t);
657     scale = (2.0/omega) - 1.0;
658     /*  x = (E + U)^{-1} b */
659     VecSet(&zero,mat->lvec);
660     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
661                               mat->Mvctx); CHKERRQ(ierr);
662     for ( i=m-1; i>-1; i-- ) {
663       n    = A->i[i+1] - diag[i] - 1;
664       idx  = A->j + diag[i] + !shift;
665       v    = A->a + diag[i] + !shift;
666       sum  = b[i];
667       SPARSEDENSEMDOT(sum,xs,v,idx,n);
668       d    = fshift + A->a[diag[i]+shift];
669       n    = B->i[i+1] - B->i[i];
670       idx  = B->j + B->i[i] + shift;
671       v    = B->a + B->i[i] + shift;
672       SPARSEDENSEMDOT(sum,ls,v,idx,n);
673       x[i] = omega*(sum/d);
674     }
675     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
676                             mat->Mvctx); CHKERRQ(ierr);
677 
678     /*  t = b - (2*E - D)x */
679     v = A->a;
680     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
681 
682     /*  t = (E + L)^{-1}t */
683     ts = t + shift; /* shifted by one for index start of a or mat->j*/
684     diag = A->diag;
685     VecSet(&zero,mat->lvec);
686     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
687                                                  mat->Mvctx); CHKERRQ(ierr);
688     for ( i=0; i<m; i++ ) {
689       n    = diag[i] - A->i[i];
690       idx  = A->j + A->i[i] + shift;
691       v    = A->a + A->i[i] + shift;
692       sum  = t[i];
693       SPARSEDENSEMDOT(sum,ts,v,idx,n);
694       d    = fshift + A->a[diag[i]+shift];
695       n    = B->i[i+1] - B->i[i];
696       idx  = B->j + B->i[i] + shift;
697       v    = B->a + B->i[i] + shift;
698       SPARSEDENSEMDOT(sum,ls,v,idx,n);
699       t[i] = omega*(sum/d);
700     }
701     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
702                                                     mat->Mvctx); CHKERRQ(ierr);
703     /*  x = x + t */
704     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
705     VecDestroy(tt);
706     return 0;
707   }
708 
709 
710   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
711     if (flag & SOR_ZERO_INITIAL_GUESS) {
712       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
713     }
714     else {
715       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
716       CHKERRQ(ierr);
717       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
718       CHKERRQ(ierr);
719     }
720     while (its--) {
721       /* go down through the rows */
722       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
723                               mat->Mvctx); CHKERRQ(ierr);
724       for ( i=0; i<m; i++ ) {
725         n    = A->i[i+1] - A->i[i];
726         idx  = A->j + A->i[i] + shift;
727         v    = A->a + A->i[i] + shift;
728         sum  = b[i];
729         SPARSEDENSEMDOT(sum,xs,v,idx,n);
730         d    = fshift + A->a[diag[i]+shift];
731         n    = B->i[i+1] - B->i[i];
732         idx  = B->j + B->i[i] + shift;
733         v    = B->a + B->i[i] + shift;
734         SPARSEDENSEMDOT(sum,ls,v,idx,n);
735         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
736       }
737       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
738                             mat->Mvctx); CHKERRQ(ierr);
739       /* come up through the rows */
740       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
741                               mat->Mvctx); CHKERRQ(ierr);
742       for ( i=m-1; i>-1; i-- ) {
743         n    = A->i[i+1] - A->i[i];
744         idx  = A->j + A->i[i] + shift;
745         v    = A->a + A->i[i] + shift;
746         sum  = b[i];
747         SPARSEDENSEMDOT(sum,xs,v,idx,n);
748         d    = fshift + A->a[diag[i]+shift];
749         n    = B->i[i+1] - B->i[i];
750         idx  = B->j + B->i[i] + shift;
751         v    = B->a + B->i[i] + shift;
752         SPARSEDENSEMDOT(sum,ls,v,idx,n);
753         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
754       }
755       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
756                             mat->Mvctx); CHKERRQ(ierr);
757     }
758   }
759   else if (flag & SOR_FORWARD_SWEEP){
760     if (flag & SOR_ZERO_INITIAL_GUESS) {
761       VecSet(&zero,mat->lvec);
762       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
763                               mat->Mvctx); CHKERRQ(ierr);
764       for ( i=0; i<m; i++ ) {
765         n    = diag[i] - A->i[i];
766         idx  = A->j + A->i[i] + shift;
767         v    = A->a + A->i[i] + shift;
768         sum  = b[i];
769         SPARSEDENSEMDOT(sum,xs,v,idx,n);
770         d    = fshift + A->a[diag[i]+shift];
771         n    = B->i[i+1] - B->i[i];
772         idx  = B->j + B->i[i] + shift;
773         v    = B->a + B->i[i] + shift;
774         SPARSEDENSEMDOT(sum,ls,v,idx,n);
775         x[i] = omega*(sum/d);
776       }
777       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
778                             mat->Mvctx); CHKERRQ(ierr);
779       its--;
780     }
781     while (its--) {
782       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
783       CHKERRQ(ierr);
784       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
785       CHKERRQ(ierr);
786       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
787                               mat->Mvctx); CHKERRQ(ierr);
788       for ( i=0; i<m; i++ ) {
789         n    = A->i[i+1] - A->i[i];
790         idx  = A->j + A->i[i] + shift;
791         v    = A->a + A->i[i] + shift;
792         sum  = b[i];
793         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794         d    = fshift + A->a[diag[i]+shift];
795         n    = B->i[i+1] - B->i[i];
796         idx  = B->j + B->i[i] + shift;
797         v    = B->a + B->i[i] + shift;
798         SPARSEDENSEMDOT(sum,ls,v,idx,n);
799         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
800       }
801       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
802                             mat->Mvctx); CHKERRQ(ierr);
803     }
804   }
805   else if (flag & SOR_BACKWARD_SWEEP){
806     if (flag & SOR_ZERO_INITIAL_GUESS) {
807       VecSet(&zero,mat->lvec);
808       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
809                               mat->Mvctx); CHKERRQ(ierr);
810       for ( i=m-1; i>-1; i-- ) {
811         n    = A->i[i+1] - diag[i] - 1;
812         idx  = A->j + diag[i] + !shift;
813         v    = A->a + diag[i] + !shift;
814         sum  = b[i];
815         SPARSEDENSEMDOT(sum,xs,v,idx,n);
816         d    = fshift + A->a[diag[i]+shift];
817         n    = B->i[i+1] - B->i[i];
818         idx  = B->j + B->i[i] + shift;
819         v    = B->a + B->i[i] + shift;
820         SPARSEDENSEMDOT(sum,ls,v,idx,n);
821         x[i] = omega*(sum/d);
822       }
823       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
824                             mat->Mvctx); CHKERRQ(ierr);
825       its--;
826     }
827     while (its--) {
828       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
829                             mat->Mvctx); CHKERRQ(ierr);
830       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
831                             mat->Mvctx); CHKERRQ(ierr);
832       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
833                               mat->Mvctx); CHKERRQ(ierr);
834       for ( i=m-1; i>-1; i-- ) {
835         n    = A->i[i+1] - A->i[i];
836         idx  = A->j + A->i[i] + shift;
837         v    = A->a + A->i[i] + shift;
838         sum  = b[i];
839         SPARSEDENSEMDOT(sum,xs,v,idx,n);
840         d    = fshift + A->a[diag[i]+shift];
841         n    = B->i[i+1] - B->i[i];
842         idx  = B->j + B->i[i] + shift;
843         v    = B->a + B->i[i] + shift;
844         SPARSEDENSEMDOT(sum,ls,v,idx,n);
845         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
846       }
847       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
848                             mat->Mvctx); CHKERRQ(ierr);
849     }
850   }
851   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
852     if (flag & SOR_ZERO_INITIAL_GUESS) {
853       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
854     }
855     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
856     CHKERRQ(ierr);
857     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
858     CHKERRQ(ierr);
859     while (its--) {
860       /* go down through the rows */
861       for ( i=0; i<m; i++ ) {
862         n    = A->i[i+1] - A->i[i];
863         idx  = A->j + A->i[i] + shift;
864         v    = A->a + A->i[i] + shift;
865         sum  = b[i];
866         SPARSEDENSEMDOT(sum,xs,v,idx,n);
867         d    = fshift + A->a[diag[i]+shift];
868         n    = B->i[i+1] - B->i[i];
869         idx  = B->j + B->i[i] + shift;
870         v    = B->a + B->i[i] + shift;
871         SPARSEDENSEMDOT(sum,ls,v,idx,n);
872         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
873       }
874       /* come up through the rows */
875       for ( i=m-1; i>-1; i-- ) {
876         n    = A->i[i+1] - A->i[i];
877         idx  = A->j + A->i[i] + shift;
878         v    = A->a + A->i[i] + shift;
879         sum  = b[i];
880         SPARSEDENSEMDOT(sum,xs,v,idx,n);
881         d    = fshift + A->a[diag[i]+shift];
882         n    = B->i[i+1] - B->i[i];
883         idx  = B->j + B->i[i] + shift;
884         v    = B->a + B->i[i] + shift;
885         SPARSEDENSEMDOT(sum,ls,v,idx,n);
886         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
887       }
888     }
889   }
890   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
891     if (flag & SOR_ZERO_INITIAL_GUESS) {
892       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
893     }
894     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
895     CHKERRQ(ierr);
896     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
897     CHKERRQ(ierr);
898     while (its--) {
899       for ( i=0; i<m; i++ ) {
900         n    = A->i[i+1] - A->i[i];
901         idx  = A->j + A->i[i] + shift;
902         v    = A->a + A->i[i] + shift;
903         sum  = b[i];
904         SPARSEDENSEMDOT(sum,xs,v,idx,n);
905         d    = fshift + A->a[diag[i]+shift];
906         n    = B->i[i+1] - B->i[i];
907         idx  = B->j + B->i[i] + shift;
908         v    = B->a + B->i[i] + shift;
909         SPARSEDENSEMDOT(sum,ls,v,idx,n);
910         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
911       }
912     }
913   }
914   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
915     if (flag & SOR_ZERO_INITIAL_GUESS) {
916       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
917     }
918     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
919                             mat->Mvctx); CHKERRQ(ierr);
920     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
921                             mat->Mvctx); CHKERRQ(ierr);
922     while (its--) {
923       for ( i=m-1; i>-1; i-- ) {
924         n    = A->i[i+1] - A->i[i];
925         idx  = A->j + A->i[i] + shift;
926         v    = A->a + A->i[i] + shift;
927         sum  = b[i];
928         SPARSEDENSEMDOT(sum,xs,v,idx,n);
929         d    = fshift + A->a[diag[i]+shift];
930         n    = B->i[i+1] - B->i[i];
931         idx  = B->j + B->i[i] + shift;
932         v    = B->a + B->i[i] + shift;
933         SPARSEDENSEMDOT(sum,ls,v,idx,n);
934         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
935       }
936     }
937   }
938   return 0;
939 }
940 
941 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
942                              int *nzalloc,int *mem)
943 {
944   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
945   Mat        A = mat->A, B = mat->B;
946   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
947 
948   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
949   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
950   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
951   if (flag == MAT_LOCAL) {
952     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
953   } else if (flag == MAT_GLOBAL_MAX) {
954     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
955     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
956   } else if (flag == MAT_GLOBAL_SUM) {
957     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
958     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
959   }
960   return 0;
961 }
962 
963 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
964 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
965 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
966 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
967 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
968 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
969 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
970 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
971 
972 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
973 {
974   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
975 
976   if (op == NO_NEW_NONZERO_LOCATIONS ||
977       op == YES_NEW_NONZERO_LOCATIONS ||
978       op == COLUMNS_SORTED ||
979       op == ROW_ORIENTED) {
980         MatSetOption(a->A,op);
981         MatSetOption(a->B,op);
982   }
983   else if (op == ROWS_SORTED ||
984            op == SYMMETRIC_MATRIX ||
985            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
986            op == YES_NEW_DIAGONALS)
987     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
988   else if (op == COLUMN_ORIENTED)
989     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED not supported");}
990   else if (op == NO_NEW_DIAGONALS)
991     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS not supported");}
992   else
993     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:Option not supported");}
994   return 0;
995 }
996 
997 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
998 {
999   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1000   *m = mat->M; *n = mat->N;
1001   return 0;
1002 }
1003 
1004 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1005 {
1006   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1007   *m = mat->m; *n = mat->N;
1008   return 0;
1009 }
1010 
1011 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1012 {
1013   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1014   *m = mat->rstart; *n = mat->rend;
1015   return 0;
1016 }
1017 
1018 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1019 {
1020   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1021   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1022   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1023   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1024 
1025   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
1026   lrow = row - rstart;
1027 
1028   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1029   if (!v)   {pvA = 0; pvB = 0;}
1030   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1031   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1032   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1033   nztot = nzA + nzB;
1034 
1035   if (v  || idx) {
1036     if (nztot) {
1037       /* Sort by increasing column numbers, assuming A and B already sorted */
1038       int imark;
1039       if (mat->assembled) {
1040         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1041       }
1042       if (v) {
1043         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1044         for ( i=0; i<nzB; i++ ) {
1045           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1046           else break;
1047         }
1048         imark = i;
1049         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1050         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1051       }
1052       if (idx) {
1053         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1054         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1055         for ( i=0; i<nzB; i++ ) {
1056           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1057           else break;
1058         }
1059         imark = i;
1060         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1061         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1062       }
1063     }
1064     else {*idx = 0; *v=0;}
1065   }
1066   *nz = nztot;
1067   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1068   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1069   return 0;
1070 }
1071 
1072 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1073 {
1074   if (idx) PETSCFREE(*idx);
1075   if (v) PETSCFREE(*v);
1076   return 0;
1077 }
1078 
1079 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1080 {
1081   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1082   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1083   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1084   double     sum = 0.0;
1085   Scalar     *v;
1086 
1087   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1088   if (aij->numtids == 1) {
1089     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1090   } else {
1091     if (type == NORM_FROBENIUS) {
1092       v = amat->a;
1093       for (i=0; i<amat->nz; i++ ) {
1094 #if defined(PETSC_COMPLEX)
1095         sum += real(conj(*v)*(*v)); v++;
1096 #else
1097         sum += (*v)*(*v); v++;
1098 #endif
1099       }
1100       v = bmat->a;
1101       for (i=0; i<bmat->nz; i++ ) {
1102 #if defined(PETSC_COMPLEX)
1103         sum += real(conj(*v)*(*v)); v++;
1104 #else
1105         sum += (*v)*(*v); v++;
1106 #endif
1107       }
1108       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1109       *norm = sqrt(*norm);
1110     }
1111     else if (type == NORM_1) { /* max column norm */
1112       double *tmp, *tmp2;
1113       int    *jj, *garray = aij->garray;
1114       tmp  = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1115       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1116       PetscZero(tmp,aij->N*sizeof(double));
1117       *norm = 0.0;
1118       v = amat->a; jj = amat->j;
1119       for ( j=0; j<amat->nz; j++ ) {
1120 #if defined(PETSC_COMPLEX)
1121         tmp[cstart + *jj++ + shift] += abs(*v++);
1122 #else
1123         tmp[cstart + *jj++ + shift] += fabs(*v++);
1124 #endif
1125       }
1126       v = bmat->a; jj = bmat->j;
1127       for ( j=0; j<bmat->nz; j++ ) {
1128 #if defined(PETSC_COMPLEX)
1129         tmp[garray[*jj++ + shift]] += abs(*v++);
1130 #else
1131         tmp[garray[*jj++ + shift]] += fabs(*v++);
1132 #endif
1133       }
1134       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1135       for ( j=0; j<aij->N; j++ ) {
1136         if (tmp2[j] > *norm) *norm = tmp2[j];
1137       }
1138       PETSCFREE(tmp); PETSCFREE(tmp2);
1139     }
1140     else if (type == NORM_INFINITY) { /* max row norm */
1141       double normtemp = 0.0;
1142       for ( j=0; j<amat->m; j++ ) {
1143         v = amat->a + amat->i[j] + shift;
1144         sum = 0.0;
1145         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1146 #if defined(PETSC_COMPLEX)
1147           sum += abs(*v); v++;
1148 #else
1149           sum += fabs(*v); v++;
1150 #endif
1151         }
1152         v = bmat->a + bmat->i[j] + shift;
1153         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1154 #if defined(PETSC_COMPLEX)
1155           sum += abs(*v); v++;
1156 #else
1157           sum += fabs(*v); v++;
1158 #endif
1159         }
1160         if (sum > normtemp) normtemp = sum;
1161         MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1162       }
1163     }
1164     else {
1165       SETERRQ(1,"MatNorm_MPIRow:No support for two norm");
1166     }
1167   }
1168   return 0;
1169 }
1170 
1171 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1172 {
1173   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1174   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1175   int        ierr,shift = Aloc->indexshift;
1176   Mat        B;
1177   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1178   Scalar     *array;
1179 
1180   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1181   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1182   CHKERRQ(ierr);
1183 
1184   /* copy over the A part */
1185   Aloc = (Mat_SeqAIJ*) a->A->data;
1186   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1187   row = a->rstart;
1188   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1189   for ( i=0; i<m; i++ ) {
1190     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1191     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1192   }
1193   aj = Aloc->j;
1194   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1195 
1196   /* copy over the B part */
1197   Aloc = (Mat_SeqAIJ*) a->B->data;
1198   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1199   row = a->rstart;
1200   ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1201   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1202   for ( i=0; i<m; i++ ) {
1203     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1204     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1205   }
1206   PETSCFREE(ct);
1207   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1208   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1209   if (matout) {
1210     *matout = B;
1211   } else {
1212     /* This isn't really an in-place transpose .... but free data structures from a */
1213     PETSCFREE(a->rowners);
1214     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1215     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1216     if (a->colmap) PETSCFREE(a->colmap);
1217     if (a->garray) PETSCFREE(a->garray);
1218     if (a->lvec) VecDestroy(a->lvec);
1219     if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx);
1220     PETSCFREE(a);
1221     PetscMemcpy(A,B,sizeof(struct _Mat));
1222     PETSCHEADERDESTROY(B);
1223   }
1224   return 0;
1225 }
1226 
1227 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1228 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1229 
1230 /* -------------------------------------------------------------------*/
1231 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1232        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1233        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1234        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1235        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1236        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1237        MatLUFactor_MPIAIJ,0,
1238        MatRelax_MPIAIJ,
1239        MatTranspose_MPIAIJ,
1240        MatGetInfo_MPIAIJ,0,
1241        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1242        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1243        0,
1244        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1245        MatGetReordering_MPIAIJ,
1246        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1247        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1248        MatILUFactorSymbolic_MPIAIJ,0,
1249        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1250 
1251 /*@C
1252    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1253    (the default parallel PETSc format).
1254 
1255    Input Parameters:
1256 .  comm - MPI communicator
1257 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1258 .  n - number of local columns (or PETSC_DECIDE to have calculated
1259            if N is given)
1260 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1261 .  N - number of global columns (or PETSC_DECIDE to have calculated
1262            if n is given)
1263 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1264            (same for all local rows)
1265 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1266            or null (possibly different for each row).  You must leave room
1267            for the diagonal entry even if it is zero.
1268 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1269            submatrix (same for all local rows).
1270 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1271            submatrix or null (possibly different for each row).
1272 
1273    Output Parameter:
1274 .  newmat - the matrix
1275 
1276    Notes:
1277    The AIJ format (also called the Yale sparse matrix format or
1278    compressed row storage), is fully compatible with standard Fortran 77
1279    storage.  That is, the stored row and column indices begin at
1280    one, not zero.  See the users manual for further details.
1281 
1282    The user MUST specify either the local or global matrix dimensions
1283    (possibly both).
1284 
1285    Storage Information:
1286    For a square global matrix we define each processor's diagonal portion
1287    to be its local rows and the corresponding columns (a square submatrix);
1288    each processor's off-diagonal portion encompasses the remainder of the
1289    local matrix (a rectangular submatrix).
1290 
1291    The user can specify preallocated storage for the diagonal part of
1292    the local submatrix with either d_nz or d_nnz (not both). Set both
1293    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1294    Likewise, specify preallocated storage for the off-diagonal part of
1295    the local submatrix with o_nz or o_nnz (not both).
1296 
1297 .keywords: matrix, aij, compressed row, sparse, parallel
1298 
1299 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1300 @*/
1301 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1302                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1303 {
1304   Mat          mat;
1305   Mat_MPIAIJ   *a;
1306   int          ierr, i,sum[2],work[2];
1307 
1308   *newmat         = 0;
1309   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1310   PLogObjectCreate(mat);
1311   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1312   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1313   mat->destroy    = MatDestroy_MPIAIJ;
1314   mat->view       = MatView_MPIAIJ;
1315   mat->factor     = 0;
1316 
1317   a->insertmode = NOT_SET_VALUES;
1318   MPI_Comm_rank(comm,&a->mytid);
1319   MPI_Comm_size(comm,&a->numtids);
1320 
1321   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1322     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1323 
1324   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1325     work[0] = m; work[1] = n;
1326     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1327     if (M == PETSC_DECIDE) M = sum[0];
1328     if (N == PETSC_DECIDE) N = sum[1];
1329   }
1330   if (m == PETSC_DECIDE) {m = M/a->numtids + ((M % a->numtids) > a->mytid);}
1331   if (n == PETSC_DECIDE) {n = N/a->numtids + ((N % a->numtids) > a->mytid);}
1332   a->m = m;
1333   a->n = n;
1334   a->N = N;
1335   a->M = M;
1336 
1337   /* build local table of row and column ownerships */
1338   a->rowners = (int *) PETSCMALLOC(2*(a->numtids+2)*sizeof(int)); CHKPTRQ(a->rowners);
1339   PLogObjectMemory(mat,2*(a->numtids+2)*sizeof(int)+sizeof(struct _Mat)+
1340                        sizeof(Mat_MPIAIJ));
1341   a->cowners = a->rowners + a->numtids + 1;
1342   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1343   a->rowners[0] = 0;
1344   for ( i=2; i<=a->numtids; i++ ) {
1345     a->rowners[i] += a->rowners[i-1];
1346   }
1347   a->rstart = a->rowners[a->mytid];
1348   a->rend   = a->rowners[a->mytid+1];
1349   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1350   a->cowners[0] = 0;
1351   for ( i=2; i<=a->numtids; i++ ) {
1352     a->cowners[i] += a->cowners[i-1];
1353   }
1354   a->cstart = a->cowners[a->mytid];
1355   a->cend   = a->cowners[a->mytid+1];
1356 
1357   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1358   PLogObjectParent(mat,a->A);
1359   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1360   PLogObjectParent(mat,a->B);
1361 
1362   /* build cache for off array entries formed */
1363   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1364   a->colmap    = 0;
1365   a->garray    = 0;
1366 
1367   /* stuff used for matrix vector multiply */
1368   a->lvec      = 0;
1369   a->Mvctx     = 0;
1370   a->assembled = 0;
1371 
1372   *newmat = mat;
1373   return 0;
1374 }
1375 
1376 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1377 {
1378   Mat        mat;
1379   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1380   int        ierr, len;
1381 
1382   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1383   *newmat      = 0;
1384   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1385   PLogObjectCreate(mat);
1386   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1387   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1388   mat->destroy    = MatDestroy_MPIAIJ;
1389   mat->view       = MatView_MPIAIJ;
1390   mat->factor     = matin->factor;
1391 
1392   a->m          = oldmat->m;
1393   a->n          = oldmat->n;
1394   a->M          = oldmat->M;
1395   a->N          = oldmat->N;
1396 
1397   a->assembled  = 1;
1398   a->rstart     = oldmat->rstart;
1399   a->rend       = oldmat->rend;
1400   a->cstart     = oldmat->cstart;
1401   a->cend       = oldmat->cend;
1402   a->numtids    = oldmat->numtids;
1403   a->mytid      = oldmat->mytid;
1404   a->insertmode = NOT_SET_VALUES;
1405 
1406   a->rowners    = (int *) PETSCMALLOC((a->numtids+1)*sizeof(int));CHKPTRQ(a->rowners);
1407   PLogObjectMemory(mat,(a->numtids+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1408   PetscMemcpy(a->rowners,oldmat->rowners,(a->numtids+1)*sizeof(int));
1409   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1410   if (oldmat->colmap) {
1411     a->colmap      = (int *) PETSCMALLOC( (a->N)*sizeof(int) );CHKPTRQ(a->colmap);
1412     PLogObjectMemory(mat,(a->N)*sizeof(int));
1413     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1414   } else a->colmap = 0;
1415   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1416     a->garray      = (int *) PETSCMALLOC(len*sizeof(int) ); CHKPTRQ(a->garray);
1417     PLogObjectMemory(mat,len*sizeof(int));
1418     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1419   } else a->garray = 0;
1420 
1421   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1422   PLogObjectParent(mat,a->lvec);
1423   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1424   PLogObjectParent(mat,a->Mvctx);
1425   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1426   PLogObjectParent(mat,a->A);
1427   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1428   PLogObjectParent(mat,a->B);
1429   *newmat = mat;
1430   return 0;
1431 }
1432 
1433 #include "sysio.h"
1434 
1435 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1436 {
1437   Mat          A;
1438   int          i, nz, ierr, j,rstart, rend, fd;
1439   Scalar       *vals,*svals;
1440   PetscObject  vobj = (PetscObject) bview;
1441   MPI_Comm     comm = vobj->comm;
1442   MPI_Status   status;
1443   int          header[4],mytid,numtid,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1444   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1445   int          tag = ((PetscObject)bview)->tag;
1446 
1447   MPI_Comm_size(comm,&numtid); MPI_Comm_rank(comm,&mytid);
1448   if (!mytid) {
1449     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1450     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1451     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1452   }
1453 
1454   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1455   M = header[1]; N = header[2];
1456   /* determine ownership of all rows */
1457   m = M/numtid + ((M % numtid) > mytid);
1458   rowners = (int *) PETSCMALLOC((numtid+2)*sizeof(int)); CHKPTRQ(rowners);
1459   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1460   rowners[0] = 0;
1461   for ( i=2; i<=numtid; i++ ) {
1462     rowners[i] += rowners[i-1];
1463   }
1464   rstart = rowners[mytid];
1465   rend   = rowners[mytid+1];
1466 
1467   /* distribute row lengths to all processors */
1468   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1469   offlens = ourlens + (rend-rstart);
1470   if (!mytid) {
1471     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
1472     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1473     sndcounts = (int*) PETSCMALLOC( numtid*sizeof(int) ); CHKPTRQ(sndcounts);
1474     for ( i=0; i<numtid; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1475     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1476     PETSCFREE(sndcounts);
1477   }
1478   else {
1479     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1480   }
1481 
1482   if (!mytid) {
1483     /* calculate the number of nonzeros on each processor */
1484     procsnz = (int*) PETSCMALLOC( numtid*sizeof(int) ); CHKPTRQ(procsnz);
1485     PetscZero(procsnz,numtid*sizeof(int));
1486     for ( i=0; i<numtid; i++ ) {
1487       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1488         procsnz[i] += rowlengths[j];
1489       }
1490     }
1491     PETSCFREE(rowlengths);
1492 
1493     /* determine max buffer needed and allocate it */
1494     maxnz = 0;
1495     for ( i=0; i<numtid; i++ ) {
1496       maxnz = PETSCMAX(maxnz,procsnz[i]);
1497     }
1498     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
1499 
1500     /* read in my part of the matrix column indices  */
1501     nz = procsnz[0];
1502     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1503     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1504 
1505     /* read in every one elses and ship off */
1506     for ( i=1; i<numtid; i++ ) {
1507       nz = procsnz[i];
1508       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1509       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1510     }
1511     PETSCFREE(cols);
1512   }
1513   else {
1514     /* determine buffer space needed for message */
1515     nz = 0;
1516     for ( i=0; i<m; i++ ) {
1517       nz += ourlens[i];
1518     }
1519     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1520 
1521     /* receive message of column indices*/
1522     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1523     MPI_Get_count(&status,MPI_INT,&maxnz);
1524     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1525   }
1526 
1527   /* loop over local rows, determining number of off diagonal entries */
1528   PetscZero(offlens,m*sizeof(int));
1529   jj = 0;
1530   for ( i=0; i<m; i++ ) {
1531     for ( j=0; j<ourlens[i]; j++ ) {
1532       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1533       jj++;
1534     }
1535   }
1536 
1537 
1538   /* create our matrix */
1539   for ( i=0; i<m; i++ ) {
1540     ourlens[i] -= offlens[i];
1541   }
1542   if (type == MATMPIAIJ) {
1543     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1544   }
1545   else if (type == MATMPIROW) {
1546     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1547   }
1548   A = *newmat;
1549   MatSetOption(A,COLUMNS_SORTED);
1550   for ( i=0; i<m; i++ ) {
1551     ourlens[i] += offlens[i];
1552   }
1553 
1554   if (!mytid) {
1555     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1556 
1557     /* read in my part of the matrix numerical values  */
1558     nz = procsnz[0];
1559     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1560 
1561     /* insert into matrix */
1562     jj      = rstart;
1563     smycols = mycols;
1564     svals   = vals;
1565     for ( i=0; i<m; i++ ) {
1566       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1567       smycols += ourlens[i];
1568       svals   += ourlens[i];
1569       jj++;
1570     }
1571 
1572     /* read in other processors and ship out */
1573     for ( i=1; i<numtid; i++ ) {
1574       nz = procsnz[i];
1575       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1576       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1577     }
1578     PETSCFREE(procsnz);
1579   }
1580   else {
1581     /* receive numeric values */
1582     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1583 
1584     /* receive message of values*/
1585     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1586     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1587     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1588 
1589     /* insert into matrix */
1590     jj      = rstart;
1591     smycols = mycols;
1592     svals   = vals;
1593     for ( i=0; i<m; i++ ) {
1594       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1595       smycols += ourlens[i];
1596       svals   += ourlens[i];
1597       jj++;
1598     }
1599   }
1600   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
1601 
1602   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1603   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1604   return 0;
1605 }
1606