xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision aeafbbfc33d028db7008a5487c5a5ac8aec64109)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.174 1996/11/19 16:31:04 bsmith Exp curfman $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 int CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
30                            PetscTruth *done)
31 {
32   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
33   int        ierr;
34   if (aij->size == 1) {
35     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
36   } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel");
37   return 0;
38 }
39 
40 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
41                                PetscTruth *done)
42 {
43   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
44   int        ierr;
45   if (aij->size == 1) {
46     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
47   } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel");
48   return 0;
49 }
50 
51 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
52 
53 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
54 {
55   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
56   Scalar     value;
57   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
58   int        cstart = aij->cstart, cend = aij->cend,row,col;
59   int        roworiented = aij->roworiented;
60 
61 #if defined(PETSC_BOPT_g)
62   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
63     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
64   }
65 #endif
66   aij->insertmode = addv;
67   for ( i=0; i<m; i++ ) {
68 #if defined(PETSC_BOPT_g)
69     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
70     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
71 #endif
72     if (im[i] >= rstart && im[i] < rend) {
73       row = im[i] - rstart;
74       for ( j=0; j<n; j++ ) {
75         if (in[j] >= cstart && in[j] < cend){
76           col = in[j] - cstart;
77           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
78           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
79         }
80 #if defined(PETSC_BOPT_g)
81         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");}
82         else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");}
83 #endif
84         else {
85           if (mat->was_assembled) {
86             if (!aij->colmap) {
87               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
88             }
89             col = aij->colmap[in[j]] - 1;
90             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
91               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
92               col =  in[j];
93             }
94           }
95           else col = in[j];
96           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
97           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
98         }
99       }
100     }
101     else {
102       if (roworiented && !aij->donotstash) {
103         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
104       }
105       else {
106         if (!aij->donotstash) {
107           row = im[i];
108           for ( j=0; j<n; j++ ) {
109             ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
110           }
111         }
112       }
113     }
114   }
115   return 0;
116 }
117 
118 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
119 {
120   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
121   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
122   int        cstart = aij->cstart, cend = aij->cend,row,col;
123 
124   for ( i=0; i<m; i++ ) {
125     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
126     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
127     if (idxm[i] >= rstart && idxm[i] < rend) {
128       row = idxm[i] - rstart;
129       for ( j=0; j<n; j++ ) {
130         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
131         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
132         if (idxn[j] >= cstart && idxn[j] < cend){
133           col = idxn[j] - cstart;
134           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
135         }
136         else {
137           if (!aij->colmap) {
138             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
139           }
140           col = aij->colmap[idxn[j]] - 1;
141           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
142           else {
143             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
144           }
145         }
146       }
147     }
148     else {
149       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
150     }
151   }
152   return 0;
153 }
154 
155 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
156 {
157   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
158   MPI_Comm    comm = mat->comm;
159   int         size = aij->size, *owners = aij->rowners;
160   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
161   MPI_Request *send_waits,*recv_waits;
162   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
163   InsertMode  addv;
164   Scalar      *rvalues,*svalues;
165 
166   /* make sure all processors are either in INSERTMODE or ADDMODE */
167   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
168   if (addv == (ADD_VALUES|INSERT_VALUES)) {
169     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
170   }
171   aij->insertmode = addv; /* in case this processor had no cache */
172 
173   /*  first count number of contributors to each processor */
174   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
175   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
176   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
177   for ( i=0; i<aij->stash.n; i++ ) {
178     idx = aij->stash.idx[i];
179     for ( j=0; j<size; j++ ) {
180       if (idx >= owners[j] && idx < owners[j+1]) {
181         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
182       }
183     }
184   }
185   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
186 
187   /* inform other processors of number of messages and max length*/
188   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
189   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
190   nreceives = work[rank];
191   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
192   nmax = work[rank];
193   PetscFree(work);
194 
195   /* post receives:
196        1) each message will consist of ordered pairs
197      (global index,value) we store the global index as a double
198      to simplify the message passing.
199        2) since we don't know how long each individual message is we
200      allocate the largest needed buffer for each receive. Potentially
201      this is a lot of wasted space.
202 
203 
204        This could be done better.
205   */
206   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
207   CHKPTRQ(rvalues);
208   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
209   CHKPTRQ(recv_waits);
210   for ( i=0; i<nreceives; i++ ) {
211     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
212               comm,recv_waits+i);
213   }
214 
215   /* do sends:
216       1) starts[i] gives the starting index in svalues for stuff going to
217          the ith processor
218   */
219   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
220   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
221   CHKPTRQ(send_waits);
222   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
223   starts[0] = 0;
224   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
225   for ( i=0; i<aij->stash.n; i++ ) {
226     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
227     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
228     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
229   }
230   PetscFree(owner);
231   starts[0] = 0;
232   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
233   count = 0;
234   for ( i=0; i<size; i++ ) {
235     if (procs[i]) {
236       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
237                 comm,send_waits+count++);
238     }
239   }
240   PetscFree(starts); PetscFree(nprocs);
241 
242   /* Free cache space */
243   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
244   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
245 
246   aij->svalues    = svalues;    aij->rvalues    = rvalues;
247   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
248   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
249   aij->rmax       = nmax;
250 
251   return 0;
252 }
253 extern int MatSetUpMultiply_MPIAIJ(Mat);
254 
255 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
256 {
257   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
258   MPI_Status  *send_status,recv_status;
259   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
260   int         row,col,other_disassembled;
261   Scalar      *values,val;
262   InsertMode  addv = aij->insertmode;
263 
264   /*  wait on receives */
265   while (count) {
266     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
267     /* unpack receives into our local space */
268     values = aij->rvalues + 3*imdex*aij->rmax;
269     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
270     n = n/3;
271     for ( i=0; i<n; i++ ) {
272       row = (int) PetscReal(values[3*i]) - aij->rstart;
273       col = (int) PetscReal(values[3*i+1]);
274       val = values[3*i+2];
275       if (col >= aij->cstart && col < aij->cend) {
276         col -= aij->cstart;
277         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
278       }
279       else {
280         if (mat->was_assembled || mat->assembled) {
281           if (!aij->colmap) {
282             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
283           }
284           col = aij->colmap[col] - 1;
285           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
286             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
287             col = (int) PetscReal(values[3*i+1]);
288           }
289         }
290         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
291       }
292     }
293     count--;
294   }
295   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
296 
297   /* wait on sends */
298   if (aij->nsends) {
299     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
300     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
301     PetscFree(send_status);
302   }
303   PetscFree(aij->send_waits); PetscFree(aij->svalues);
304 
305   aij->insertmode = NOT_SET_VALUES;
306   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
307   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
308 
309   /* determine if any processor has disassembled, if so we must
310      also disassemble ourselfs, in order that we may reassemble. */
311   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
312   if (mat->was_assembled && !other_disassembled) {
313     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
314   }
315 
316   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
317     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
318   }
319   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
321 
322   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
323   return 0;
324 }
325 
326 static int MatZeroEntries_MPIAIJ(Mat A)
327 {
328   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
329   int        ierr;
330   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
331   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
332   return 0;
333 }
334 
335 /* the code does not do the diagonal entries correctly unless the
336    matrix is square and the column and row owerships are identical.
337    This is a BUG. The only way to fix it seems to be to access
338    aij->A and aij->B directly and not through the MatZeroRows()
339    routine.
340 */
341 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
342 {
343   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
344   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
345   int            *procs,*nprocs,j,found,idx,nsends,*work;
346   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
347   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
348   int            *lens,imdex,*lrows,*values;
349   MPI_Comm       comm = A->comm;
350   MPI_Request    *send_waits,*recv_waits;
351   MPI_Status     recv_status,*send_status;
352   IS             istmp;
353 
354   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
355   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
356 
357   /*  first count number of contributors to each processor */
358   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
359   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
360   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
361   for ( i=0; i<N; i++ ) {
362     idx = rows[i];
363     found = 0;
364     for ( j=0; j<size; j++ ) {
365       if (idx >= owners[j] && idx < owners[j+1]) {
366         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
367       }
368     }
369     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
370   }
371   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
372 
373   /* inform other processors of number of messages and max length*/
374   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
375   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
376   nrecvs = work[rank];
377   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
378   nmax = work[rank];
379   PetscFree(work);
380 
381   /* post receives:   */
382   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
383   CHKPTRQ(rvalues);
384   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
385   CHKPTRQ(recv_waits);
386   for ( i=0; i<nrecvs; i++ ) {
387     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
388   }
389 
390   /* do sends:
391       1) starts[i] gives the starting index in svalues for stuff going to
392          the ith processor
393   */
394   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
395   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
396   CHKPTRQ(send_waits);
397   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
398   starts[0] = 0;
399   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
400   for ( i=0; i<N; i++ ) {
401     svalues[starts[owner[i]]++] = rows[i];
402   }
403   ISRestoreIndices(is,&rows);
404 
405   starts[0] = 0;
406   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
407   count = 0;
408   for ( i=0; i<size; i++ ) {
409     if (procs[i]) {
410       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
411     }
412   }
413   PetscFree(starts);
414 
415   base = owners[rank];
416 
417   /*  wait on receives */
418   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
419   source = lens + nrecvs;
420   count  = nrecvs; slen = 0;
421   while (count) {
422     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
423     /* unpack receives into our local space */
424     MPI_Get_count(&recv_status,MPI_INT,&n);
425     source[imdex]  = recv_status.MPI_SOURCE;
426     lens[imdex]  = n;
427     slen += n;
428     count--;
429   }
430   PetscFree(recv_waits);
431 
432   /* move the data into the send scatter */
433   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
434   count = 0;
435   for ( i=0; i<nrecvs; i++ ) {
436     values = rvalues + i*nmax;
437     for ( j=0; j<lens[i]; j++ ) {
438       lrows[count++] = values[j] - base;
439     }
440   }
441   PetscFree(rvalues); PetscFree(lens);
442   PetscFree(owner); PetscFree(nprocs);
443 
444   /* actually zap the local rows */
445   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
446   PLogObjectParent(A,istmp);
447   PetscFree(lrows);
448   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
449   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
450   ierr = ISDestroy(istmp); CHKERRQ(ierr);
451 
452   /* wait on sends */
453   if (nsends) {
454     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
455     CHKPTRQ(send_status);
456     MPI_Waitall(nsends,send_waits,send_status);
457     PetscFree(send_status);
458   }
459   PetscFree(send_waits); PetscFree(svalues);
460 
461   return 0;
462 }
463 
464 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
465 {
466   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
467   int        ierr,nt;
468 
469   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
470   if (nt != a->n) {
471     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
472   }
473   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
474   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
475   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
476   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
477   return 0;
478 }
479 
480 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
483   int        ierr;
484   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
485   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
486   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
487   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
488   return 0;
489 }
490 
491 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
492 {
493   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
494   int        ierr;
495 
496   /* do nondiagonal part */
497   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
498   /* send it on its way */
499   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
500   /* do local part */
501   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
502   /* receive remote parts: note this assumes the values are not actually */
503   /* inserted in yy until the next line, which is true for my implementation*/
504   /* but is not perhaps always true. */
505   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
506   return 0;
507 }
508 
509 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
510 {
511   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
512   int        ierr;
513 
514   /* do nondiagonal part */
515   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
516   /* send it on its way */
517   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
518   /* do local part */
519   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
520   /* receive remote parts: note this assumes the values are not actually */
521   /* inserted in yy until the next line, which is true for my implementation*/
522   /* but is not perhaps always true. */
523   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
524   return 0;
525 }
526 
527 /*
528   This only works correctly for square matrices where the subblock A->A is the
529    diagonal block
530 */
531 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
532 {
533   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
534   if (a->M != a->N)
535     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
536   if (a->rstart != a->cstart || a->rend != a->cend) {
537     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
538   return MatGetDiagonal(a->A,v);
539 }
540 
541 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
542 {
543   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
544   int        ierr;
545   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
546   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
547   return 0;
548 }
549 
550 static int MatDestroy_MPIAIJ(PetscObject obj)
551 {
552   Mat        mat = (Mat) obj;
553   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
554   int        ierr;
555 #if defined(PETSC_LOG)
556   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
557 #endif
558   PetscFree(aij->rowners);
559   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
560   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
561   if (aij->colmap) PetscFree(aij->colmap);
562   if (aij->garray) PetscFree(aij->garray);
563   if (aij->lvec)   VecDestroy(aij->lvec);
564   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
565   if (aij->rowvalues) PetscFree(aij->rowvalues);
566   PetscFree(aij);
567   if (mat->mapping) {
568     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
569   }
570   PLogObjectDestroy(mat);
571   PetscHeaderDestroy(mat);
572   return 0;
573 }
574 #include "draw.h"
575 #include "pinclude/pviewer.h"
576 
577 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
578 {
579   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
580   int         ierr;
581 
582   if (aij->size == 1) {
583     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
584   }
585   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
586   return 0;
587 }
588 
589 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
590 {
591   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
592   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
593   int         ierr, format,shift = C->indexshift,rank;
594   FILE        *fd;
595   ViewerType  vtype;
596 
597   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
598   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
599     ierr = ViewerGetFormat(viewer,&format);
600     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
601       MatInfo info;
602       int     flg;
603       MPI_Comm_rank(mat->comm,&rank);
604       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
605       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
606       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
607       PetscSequentialPhaseBegin(mat->comm,1);
608       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
609          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
610       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
611          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
612       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
613       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
614       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
615       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
616       fflush(fd);
617       PetscSequentialPhaseEnd(mat->comm,1);
618       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
619       return 0;
620     }
621     else if (format == VIEWER_FORMAT_ASCII_INFO) {
622       return 0;
623     }
624   }
625 
626   if (vtype == DRAW_VIEWER) {
627     Draw       draw;
628     PetscTruth isnull;
629     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
630     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
631   }
632 
633   if (vtype == ASCII_FILE_VIEWER) {
634     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
635     PetscSequentialPhaseBegin(mat->comm,1);
636     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
637            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
638            aij->cend);
639     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
640     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
641     fflush(fd);
642     PetscSequentialPhaseEnd(mat->comm,1);
643   }
644   else {
645     int size = aij->size;
646     rank = aij->rank;
647     if (size == 1) {
648       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
649     }
650     else {
651       /* assemble the entire matrix onto first processor. */
652       Mat         A;
653       Mat_SeqAIJ *Aloc;
654       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
655       Scalar      *a;
656 
657       if (!rank) {
658         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
659                CHKERRQ(ierr);
660       }
661       else {
662         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
663                CHKERRQ(ierr);
664       }
665       PLogObjectParent(mat,A);
666 
667       /* copy over the A part */
668       Aloc = (Mat_SeqAIJ*) aij->A->data;
669       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
670       row = aij->rstart;
671       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
672       for ( i=0; i<m; i++ ) {
673         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
674         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
675       }
676       aj = Aloc->j;
677       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
678 
679       /* copy over the B part */
680       Aloc = (Mat_SeqAIJ*) aij->B->data;
681       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
682       row = aij->rstart;
683       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
684       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
685       for ( i=0; i<m; i++ ) {
686         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
687         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
688       }
689       PetscFree(ct);
690       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
691       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
692       if (!rank) {
693         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
694       }
695       ierr = MatDestroy(A); CHKERRQ(ierr);
696     }
697   }
698   return 0;
699 }
700 
701 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
702 {
703   Mat         mat = (Mat) obj;
704   int         ierr;
705   ViewerType  vtype;
706 
707   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
708   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
709       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
710     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
711   }
712   else if (vtype == BINARY_FILE_VIEWER) {
713     return MatView_MPIAIJ_Binary(mat,viewer);
714   }
715   return 0;
716 }
717 
718 /*
719     This has to provide several versions.
720 
721      2) a) use only local smoothing updating outer values only once.
722         b) local smoothing updating outer values each inner iteration
723      3) color updating out values betwen colors.
724 */
725 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
726                            double fshift,int its,Vec xx)
727 {
728   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
729   Mat        AA = mat->A, BB = mat->B;
730   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
731   Scalar     *b,*x,*xs,*ls,d,*v,sum;
732   int        ierr,*idx, *diag;
733   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
734 
735   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
736   xs = x + shift; /* shift by one for index start of 1 */
737   ls = ls + shift;
738   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
739   diag = A->diag;
740   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
741     if (flag & SOR_ZERO_INITIAL_GUESS) {
742       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
743     }
744     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
745     CHKERRQ(ierr);
746     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
747     CHKERRQ(ierr);
748     while (its--) {
749       /* go down through the rows */
750       for ( i=0; i<m; i++ ) {
751         n    = A->i[i+1] - A->i[i];
752         idx  = A->j + A->i[i] + shift;
753         v    = A->a + A->i[i] + shift;
754         sum  = b[i];
755         SPARSEDENSEMDOT(sum,xs,v,idx,n);
756         d    = fshift + A->a[diag[i]+shift];
757         n    = B->i[i+1] - B->i[i];
758         idx  = B->j + B->i[i] + shift;
759         v    = B->a + B->i[i] + shift;
760         SPARSEDENSEMDOT(sum,ls,v,idx,n);
761         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
762       }
763       /* come up through the rows */
764       for ( i=m-1; i>-1; i-- ) {
765         n    = A->i[i+1] - A->i[i];
766         idx  = A->j + A->i[i] + shift;
767         v    = A->a + A->i[i] + shift;
768         sum  = b[i];
769         SPARSEDENSEMDOT(sum,xs,v,idx,n);
770         d    = fshift + A->a[diag[i]+shift];
771         n    = B->i[i+1] - B->i[i];
772         idx  = B->j + B->i[i] + shift;
773         v    = B->a + B->i[i] + shift;
774         SPARSEDENSEMDOT(sum,ls,v,idx,n);
775         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
776       }
777     }
778   }
779   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
780     if (flag & SOR_ZERO_INITIAL_GUESS) {
781       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
782     }
783     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
784     CHKERRQ(ierr);
785     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
786     CHKERRQ(ierr);
787     while (its--) {
788       for ( i=0; i<m; i++ ) {
789         n    = A->i[i+1] - A->i[i];
790         idx  = A->j + A->i[i] + shift;
791         v    = A->a + A->i[i] + shift;
792         sum  = b[i];
793         SPARSEDENSEMDOT(sum,xs,v,idx,n);
794         d    = fshift + A->a[diag[i]+shift];
795         n    = B->i[i+1] - B->i[i];
796         idx  = B->j + B->i[i] + shift;
797         v    = B->a + B->i[i] + shift;
798         SPARSEDENSEMDOT(sum,ls,v,idx,n);
799         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
800       }
801     }
802   }
803   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
804     if (flag & SOR_ZERO_INITIAL_GUESS) {
805       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
806     }
807     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
808                             mat->Mvctx); CHKERRQ(ierr);
809     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
810                             mat->Mvctx); CHKERRQ(ierr);
811     while (its--) {
812       for ( i=m-1; i>-1; i-- ) {
813         n    = A->i[i+1] - A->i[i];
814         idx  = A->j + A->i[i] + shift;
815         v    = A->a + A->i[i] + shift;
816         sum  = b[i];
817         SPARSEDENSEMDOT(sum,xs,v,idx,n);
818         d    = fshift + A->a[diag[i]+shift];
819         n    = B->i[i+1] - B->i[i];
820         idx  = B->j + B->i[i] + shift;
821         v    = B->a + B->i[i] + shift;
822         SPARSEDENSEMDOT(sum,ls,v,idx,n);
823         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
824       }
825     }
826   }
827   else {
828     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
829   }
830   return 0;
831 }
832 
833 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
834 {
835   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
836   Mat        A = mat->A, B = mat->B;
837   int        ierr;
838   double     isend[5], irecv[5];
839 
840   info->rows_global    = (double)mat->M;
841   info->columns_global = (double)mat->N;
842   info->rows_local     = (double)mat->m;
843   info->columns_local  = (double)mat->N;
844   info->block_size     = 1.0;
845   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
846   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
847   isend[3] = info->memory;  isend[4] = info->mallocs;
848   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
849   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
850   isend[3] += info->memory;  isend[4] += info->mallocs;
851   if (flag == MAT_LOCAL) {
852     info->nz_used      = isend[0];
853     info->nz_allocated = isend[1];
854     info->nz_unneeded  = isend[2];
855     info->memory       = isend[3];
856     info->mallocs      = isend[4];
857   } else if (flag == MAT_GLOBAL_MAX) {
858     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
859     info->nz_used      = irecv[0];
860     info->nz_allocated = irecv[1];
861     info->nz_unneeded  = irecv[2];
862     info->memory       = irecv[3];
863     info->mallocs      = irecv[4];
864   } else if (flag == MAT_GLOBAL_SUM) {
865     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
866     info->nz_used      = irecv[0];
867     info->nz_allocated = irecv[1];
868     info->nz_unneeded  = irecv[2];
869     info->memory       = irecv[3];
870     info->mallocs      = irecv[4];
871   }
872   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
873   info->fill_ratio_needed = 0;
874   info->factor_mallocs    = 0;
875 
876   return 0;
877 }
878 
879 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
880 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
881 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
882 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
883 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
884 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
885 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
886 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
887 
888 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
889 {
890   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
891 
892   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
893       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
894       op == MAT_COLUMNS_SORTED ||
895       op == MAT_ROW_ORIENTED) {
896         a->roworiented = 1;
897         MatSetOption(a->A,op);
898         MatSetOption(a->B,op);
899   }
900   else if (op == MAT_ROWS_SORTED ||
901            op == MAT_SYMMETRIC ||
902            op == MAT_STRUCTURALLY_SYMMETRIC ||
903            op == MAT_YES_NEW_DIAGONALS)
904     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
905   else if (op == MAT_COLUMN_ORIENTED) {
906     a->roworiented = 0;
907     MatSetOption(a->A,op);
908     MatSetOption(a->B,op);
909   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
910     a->donotstash = 1;
911   } else if (op == MAT_NO_NEW_DIAGONALS)
912     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
913   else
914     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
915   return 0;
916 }
917 
918 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
919 {
920   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
921   *m = mat->M; *n = mat->N;
922   return 0;
923 }
924 
925 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
926 {
927   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
928   *m = mat->m; *n = mat->N;
929   return 0;
930 }
931 
932 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
933 {
934   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
935   *m = mat->rstart; *n = mat->rend;
936   return 0;
937 }
938 
939 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
940 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
941 
942 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
943 {
944   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
945   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
946   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
947   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
948   int        *cmap, *idx_p;
949 
950   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
951   mat->getrowactive = PETSC_TRUE;
952 
953   if (!mat->rowvalues && (idx || v)) {
954     /*
955         allocate enough space to hold information from the longest row.
956     */
957     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
958     int     max = 1,m = mat->m,tmp;
959     for ( i=0; i<m; i++ ) {
960       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
961       if (max < tmp) { max = tmp; }
962     }
963     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
964     CHKPTRQ(mat->rowvalues);
965     mat->rowindices = (int *) (mat->rowvalues + max);
966   }
967 
968   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
969   lrow = row - rstart;
970 
971   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
972   if (!v)   {pvA = 0; pvB = 0;}
973   if (!idx) {pcA = 0; if (!v) pcB = 0;}
974   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
975   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
976   nztot = nzA + nzB;
977 
978   cmap  = mat->garray;
979   if (v  || idx) {
980     if (nztot) {
981       /* Sort by increasing column numbers, assuming A and B already sorted */
982       int imark = -1;
983       if (v) {
984         *v = v_p = mat->rowvalues;
985         for ( i=0; i<nzB; i++ ) {
986           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
987           else break;
988         }
989         imark = i;
990         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
991         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
992       }
993       if (idx) {
994         *idx = idx_p = mat->rowindices;
995         if (imark > -1) {
996           for ( i=0; i<imark; i++ ) {
997             idx_p[i] = cmap[cworkB[i]];
998           }
999         } else {
1000           for ( i=0; i<nzB; i++ ) {
1001             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1002             else break;
1003           }
1004           imark = i;
1005         }
1006         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1007         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1008       }
1009     }
1010     else {
1011       if (idx) *idx = 0;
1012       if (v)   *v   = 0;
1013     }
1014   }
1015   *nz = nztot;
1016   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1017   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1018   return 0;
1019 }
1020 
1021 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1022 {
1023   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1024   if (aij->getrowactive == PETSC_FALSE) {
1025     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
1026   }
1027   aij->getrowactive = PETSC_FALSE;
1028   return 0;
1029 }
1030 
1031 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1032 {
1033   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1034   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1035   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1036   double     sum = 0.0;
1037   Scalar     *v;
1038 
1039   if (aij->size == 1) {
1040     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1041   } else {
1042     if (type == NORM_FROBENIUS) {
1043       v = amat->a;
1044       for (i=0; i<amat->nz; i++ ) {
1045 #if defined(PETSC_COMPLEX)
1046         sum += real(conj(*v)*(*v)); v++;
1047 #else
1048         sum += (*v)*(*v); v++;
1049 #endif
1050       }
1051       v = bmat->a;
1052       for (i=0; i<bmat->nz; i++ ) {
1053 #if defined(PETSC_COMPLEX)
1054         sum += real(conj(*v)*(*v)); v++;
1055 #else
1056         sum += (*v)*(*v); v++;
1057 #endif
1058       }
1059       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1060       *norm = sqrt(*norm);
1061     }
1062     else if (type == NORM_1) { /* max column norm */
1063       double *tmp, *tmp2;
1064       int    *jj, *garray = aij->garray;
1065       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1066       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1067       PetscMemzero(tmp,aij->N*sizeof(double));
1068       *norm = 0.0;
1069       v = amat->a; jj = amat->j;
1070       for ( j=0; j<amat->nz; j++ ) {
1071         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1072       }
1073       v = bmat->a; jj = bmat->j;
1074       for ( j=0; j<bmat->nz; j++ ) {
1075         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1076       }
1077       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1078       for ( j=0; j<aij->N; j++ ) {
1079         if (tmp2[j] > *norm) *norm = tmp2[j];
1080       }
1081       PetscFree(tmp); PetscFree(tmp2);
1082     }
1083     else if (type == NORM_INFINITY) { /* max row norm */
1084       double ntemp = 0.0;
1085       for ( j=0; j<amat->m; j++ ) {
1086         v = amat->a + amat->i[j] + shift;
1087         sum = 0.0;
1088         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1089           sum += PetscAbsScalar(*v); v++;
1090         }
1091         v = bmat->a + bmat->i[j] + shift;
1092         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1093           sum += PetscAbsScalar(*v); v++;
1094         }
1095         if (sum > ntemp) ntemp = sum;
1096       }
1097       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1098     }
1099     else {
1100       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1101     }
1102   }
1103   return 0;
1104 }
1105 
1106 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1107 {
1108   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1109   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1110   int        ierr,shift = Aloc->indexshift;
1111   Mat        B;
1112   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1113   Scalar     *array;
1114 
1115   if (matout == PETSC_NULL && M != N)
1116     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1117   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1118          PETSC_NULL,&B); CHKERRQ(ierr);
1119 
1120   /* copy over the A part */
1121   Aloc = (Mat_SeqAIJ*) a->A->data;
1122   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1123   row = a->rstart;
1124   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1125   for ( i=0; i<m; i++ ) {
1126     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1127     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1128   }
1129   aj = Aloc->j;
1130   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1131 
1132   /* copy over the B part */
1133   Aloc = (Mat_SeqAIJ*) a->B->data;
1134   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1135   row = a->rstart;
1136   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1137   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1138   for ( i=0; i<m; i++ ) {
1139     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1140     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1141   }
1142   PetscFree(ct);
1143   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1144   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1145   if (matout != PETSC_NULL) {
1146     *matout = B;
1147   } else {
1148     /* This isn't really an in-place transpose .... but free data structures from a */
1149     PetscFree(a->rowners);
1150     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1151     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1152     if (a->colmap) PetscFree(a->colmap);
1153     if (a->garray) PetscFree(a->garray);
1154     if (a->lvec) VecDestroy(a->lvec);
1155     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1156     PetscFree(a);
1157     PetscMemcpy(A,B,sizeof(struct _Mat));
1158     PetscHeaderDestroy(B);
1159   }
1160   return 0;
1161 }
1162 
1163 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1164 {
1165   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1166   Mat a = aij->A, b = aij->B;
1167   int ierr,s1,s2,s3;
1168 
1169   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1170   if (rr) {
1171     s3 = aij->n;
1172     VecGetLocalSize_Fast(rr,s1);
1173     if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size");
1174     /* Overlap communication with computation. */
1175     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr);
1176   }
1177   if (ll) {
1178     VecGetLocalSize_Fast(ll,s1);
1179     if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size");
1180     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1181   }
1182   /* scale  the diagonal block */
1183   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1184 
1185   if (rr) {
1186     /* Do a scatter end and then right scale the off-diagonal block */
1187     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr);
1188     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1189   }
1190 
1191   return 0;
1192 }
1193 
1194 
1195 extern int MatPrintHelp_SeqAIJ(Mat);
1196 static int MatPrintHelp_MPIAIJ(Mat A)
1197 {
1198   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1199 
1200   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1201   else return 0;
1202 }
1203 
1204 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1205 {
1206   *bs = 1;
1207   return 0;
1208 }
1209 
1210 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1211 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1212 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1213 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1214 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1215 /* -------------------------------------------------------------------*/
1216 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1217        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1218        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1219        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1220        0,0,
1221        0,0,
1222        0,0,
1223        MatRelax_MPIAIJ,
1224        MatTranspose_MPIAIJ,
1225        MatGetInfo_MPIAIJ,0,
1226        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1227        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1228        0,
1229        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1230        0,0,0,0,
1231        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1232        0,0,
1233        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1234        0,0,0,
1235        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1236        MatPrintHelp_MPIAIJ,
1237        MatScale_MPIAIJ,0,0,0,
1238        MatGetBlockSize_MPIAIJ,0,0,0,0,
1239        MatFDColoringCreate_MPIAIJ};
1240 
1241 
1242 /*@C
1243    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1244    (the default parallel PETSc format).  For good matrix assembly performance
1245    the user should preallocate the matrix storage by setting the parameters
1246    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1247    performance can be increased by more than a factor of 50.
1248 
1249    Input Parameters:
1250 .  comm - MPI communicator
1251 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1252            This value should be the same as the local size used in creating the
1253            y vector for the matrix-vector product y = Ax.
1254 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1255            This value should be the same as the local size used in creating the
1256            x vector for the matrix-vector product y = Ax.
1257 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1258 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1259 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1260            (same for all local rows)
1261 .  d_nzz - array containing the number of nonzeros in the various rows of the
1262            diagonal portion of the local submatrix (possibly different for each row)
1263            or PETSC_NULL. You must leave room for the diagonal entry even if
1264            it is zero.
1265 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1266            submatrix (same for all local rows).
1267 .  o_nzz - array containing the number of nonzeros in the various rows of the
1268            off-diagonal portion of the local submatrix (possibly different for
1269            each row) or PETSC_NULL.
1270 
1271    Output Parameter:
1272 .  A - the matrix
1273 
1274    Notes:
1275    The AIJ format (also called the Yale sparse matrix format or
1276    compressed row storage), is fully compatible with standard Fortran 77
1277    storage.  That is, the stored row and column indices can begin at
1278    either one (as in Fortran) or zero.  See the users manual for details.
1279 
1280    The user MUST specify either the local or global matrix dimensions
1281    (possibly both).
1282 
1283    By default, this format uses inodes (identical nodes) when possible.
1284    We search for consecutive rows with the same nonzero structure, thereby
1285    reusing matrix information to achieve increased efficiency.
1286 
1287    Options Database Keys:
1288 $    -mat_aij_no_inode  - Do not use inodes
1289 $    -mat_aij_inode_limit <limit> - Set inode limit.
1290 $        (max limit=5)
1291 $    -mat_aij_oneindex - Internally use indexing starting at 1
1292 $        rather than 0.  Note: When calling MatSetValues(),
1293 $        the user still MUST index entries starting at 0!
1294 
1295    Storage Information:
1296    For a square global matrix we define each processor's diagonal portion
1297    to be its local rows and the corresponding columns (a square submatrix);
1298    each processor's off-diagonal portion encompasses the remainder of the
1299    local matrix (a rectangular submatrix).
1300 
1301    The user can specify preallocated storage for the diagonal part of
1302    the local submatrix with either d_nz or d_nnz (not both).  Set
1303    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1304    memory allocation.  Likewise, specify preallocated storage for the
1305    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1306 
1307    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1308    the figure below we depict these three local rows and all columns (0-11).
1309 
1310 $          0 1 2 3 4 5 6 7 8 9 10 11
1311 $         -------------------
1312 $  row 3  |  o o o d d d o o o o o o
1313 $  row 4  |  o o o d d d o o o o o o
1314 $  row 5  |  o o o d d d o o o o o o
1315 $         -------------------
1316 $
1317 
1318    Thus, any entries in the d locations are stored in the d (diagonal)
1319    submatrix, and any entries in the o locations are stored in the
1320    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1321    stored simply in the MATSEQAIJ format for compressed row storage.
1322 
1323    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1324    and o_nz should indicate the number of nonzeros per row in the o matrix.
1325    In general, for PDE problems in which most nonzeros are near the diagonal,
1326    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1327    or you will get TERRIBLE performance; see the users' manual chapter on
1328    matrices and the file $(PETSC_DIR)/Performance.
1329 
1330 .keywords: matrix, aij, compressed row, sparse, parallel
1331 
1332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1333 @*/
1334 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1335                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1336 {
1337   Mat          B;
1338   Mat_MPIAIJ   *b;
1339   int          ierr, i,sum[2],work[2],size;
1340 
1341   *A = 0;
1342   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1343   PLogObjectCreate(B);
1344   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1345   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1346   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1347   MPI_Comm_size(comm,&size);
1348   if (size == 1) {
1349     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1350     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1351     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1352     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1353     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1354     B->ops.solve             = MatSolve_MPIAIJ;
1355     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1356     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1357     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1358     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1359   }
1360   B->destroy    = MatDestroy_MPIAIJ;
1361   B->view       = MatView_MPIAIJ;
1362   B->factor     = 0;
1363   B->assembled  = PETSC_FALSE;
1364   B->mapping    = 0;
1365 
1366   b->insertmode = NOT_SET_VALUES;
1367   MPI_Comm_rank(comm,&b->rank);
1368   MPI_Comm_size(comm,&b->size);
1369 
1370   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1371     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1372 
1373   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1374     work[0] = m; work[1] = n;
1375     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1376     if (M == PETSC_DECIDE) M = sum[0];
1377     if (N == PETSC_DECIDE) N = sum[1];
1378   }
1379   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1380   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1381   b->m = m; B->m = m;
1382   b->n = n; B->n = n;
1383   b->N = N; B->N = N;
1384   b->M = M; B->M = M;
1385 
1386   /* build local table of row and column ownerships */
1387   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1388   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1389   b->cowners = b->rowners + b->size + 2;
1390   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1391   b->rowners[0] = 0;
1392   for ( i=2; i<=b->size; i++ ) {
1393     b->rowners[i] += b->rowners[i-1];
1394   }
1395   b->rstart = b->rowners[b->rank];
1396   b->rend   = b->rowners[b->rank+1];
1397   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1398   b->cowners[0] = 0;
1399   for ( i=2; i<=b->size; i++ ) {
1400     b->cowners[i] += b->cowners[i-1];
1401   }
1402   b->cstart = b->cowners[b->rank];
1403   b->cend   = b->cowners[b->rank+1];
1404 
1405   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1406   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1407   PLogObjectParent(B,b->A);
1408   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1409   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1410   PLogObjectParent(B,b->B);
1411 
1412   /* build cache for off array entries formed */
1413   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1414   b->donotstash  = 0;
1415   b->colmap      = 0;
1416   b->garray      = 0;
1417   b->roworiented = 1;
1418 
1419   /* stuff used for matrix vector multiply */
1420   b->lvec      = 0;
1421   b->Mvctx     = 0;
1422 
1423   /* stuff for MatGetRow() */
1424   b->rowindices   = 0;
1425   b->rowvalues    = 0;
1426   b->getrowactive = PETSC_FALSE;
1427 
1428   *A = B;
1429   return 0;
1430 }
1431 
1432 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1433 {
1434   Mat        mat;
1435   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1436   int        ierr, len=0, flg;
1437 
1438   *newmat       = 0;
1439   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1440   PLogObjectCreate(mat);
1441   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1442   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1443   mat->destroy    = MatDestroy_MPIAIJ;
1444   mat->view       = MatView_MPIAIJ;
1445   mat->factor     = matin->factor;
1446   mat->assembled  = PETSC_TRUE;
1447 
1448   a->m = mat->m   = oldmat->m;
1449   a->n = mat->n   = oldmat->n;
1450   a->M = mat->M   = oldmat->M;
1451   a->N = mat->N   = oldmat->N;
1452 
1453   a->rstart       = oldmat->rstart;
1454   a->rend         = oldmat->rend;
1455   a->cstart       = oldmat->cstart;
1456   a->cend         = oldmat->cend;
1457   a->size         = oldmat->size;
1458   a->rank         = oldmat->rank;
1459   a->insertmode   = NOT_SET_VALUES;
1460   a->rowvalues    = 0;
1461   a->getrowactive = PETSC_FALSE;
1462 
1463   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1464   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1465   a->cowners = a->rowners + a->size + 2;
1466   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1467   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1468   if (oldmat->colmap) {
1469     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1470     PLogObjectMemory(mat,(a->N)*sizeof(int));
1471     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1472   } else a->colmap = 0;
1473   if (oldmat->garray) {
1474     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1475     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1476     PLogObjectMemory(mat,len*sizeof(int));
1477     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1478   } else a->garray = 0;
1479 
1480   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1481   PLogObjectParent(mat,a->lvec);
1482   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1483   PLogObjectParent(mat,a->Mvctx);
1484   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1485   PLogObjectParent(mat,a->A);
1486   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1487   PLogObjectParent(mat,a->B);
1488   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1489   if (flg) {
1490     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1491   }
1492   *newmat = mat;
1493   return 0;
1494 }
1495 
1496 #include "sys.h"
1497 
1498 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1499 {
1500   Mat          A;
1501   int          i, nz, ierr, j,rstart, rend, fd;
1502   Scalar       *vals,*svals;
1503   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1504   MPI_Status   status;
1505   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1506   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1507   int          tag = ((PetscObject)viewer)->tag;
1508 
1509   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1510   if (!rank) {
1511     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1512     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1513     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1514   }
1515 
1516   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1517   M = header[1]; N = header[2];
1518   /* determine ownership of all rows */
1519   m = M/size + ((M % size) > rank);
1520   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1521   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1522   rowners[0] = 0;
1523   for ( i=2; i<=size; i++ ) {
1524     rowners[i] += rowners[i-1];
1525   }
1526   rstart = rowners[rank];
1527   rend   = rowners[rank+1];
1528 
1529   /* distribute row lengths to all processors */
1530   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1531   offlens = ourlens + (rend-rstart);
1532   if (!rank) {
1533     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1534     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1535     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1536     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1537     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1538     PetscFree(sndcounts);
1539   }
1540   else {
1541     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1542   }
1543 
1544   if (!rank) {
1545     /* calculate the number of nonzeros on each processor */
1546     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1547     PetscMemzero(procsnz,size*sizeof(int));
1548     for ( i=0; i<size; i++ ) {
1549       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1550         procsnz[i] += rowlengths[j];
1551       }
1552     }
1553     PetscFree(rowlengths);
1554 
1555     /* determine max buffer needed and allocate it */
1556     maxnz = 0;
1557     for ( i=0; i<size; i++ ) {
1558       maxnz = PetscMax(maxnz,procsnz[i]);
1559     }
1560     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1561 
1562     /* read in my part of the matrix column indices  */
1563     nz = procsnz[0];
1564     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1565     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1566 
1567     /* read in every one elses and ship off */
1568     for ( i=1; i<size; i++ ) {
1569       nz = procsnz[i];
1570       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1571       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1572     }
1573     PetscFree(cols);
1574   }
1575   else {
1576     /* determine buffer space needed for message */
1577     nz = 0;
1578     for ( i=0; i<m; i++ ) {
1579       nz += ourlens[i];
1580     }
1581     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1582 
1583     /* receive message of column indices*/
1584     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1585     MPI_Get_count(&status,MPI_INT,&maxnz);
1586     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1587   }
1588 
1589   /* loop over local rows, determining number of off diagonal entries */
1590   PetscMemzero(offlens,m*sizeof(int));
1591   jj = 0;
1592   for ( i=0; i<m; i++ ) {
1593     for ( j=0; j<ourlens[i]; j++ ) {
1594       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1595       jj++;
1596     }
1597   }
1598 
1599   /* create our matrix */
1600   for ( i=0; i<m; i++ ) {
1601     ourlens[i] -= offlens[i];
1602   }
1603   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1604   A = *newmat;
1605   MatSetOption(A,MAT_COLUMNS_SORTED);
1606   for ( i=0; i<m; i++ ) {
1607     ourlens[i] += offlens[i];
1608   }
1609 
1610   if (!rank) {
1611     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1612 
1613     /* read in my part of the matrix numerical values  */
1614     nz = procsnz[0];
1615     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1616 
1617     /* insert into matrix */
1618     jj      = rstart;
1619     smycols = mycols;
1620     svals   = vals;
1621     for ( i=0; i<m; i++ ) {
1622       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1623       smycols += ourlens[i];
1624       svals   += ourlens[i];
1625       jj++;
1626     }
1627 
1628     /* read in other processors and ship out */
1629     for ( i=1; i<size; i++ ) {
1630       nz = procsnz[i];
1631       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1632       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1633     }
1634     PetscFree(procsnz);
1635   }
1636   else {
1637     /* receive numeric values */
1638     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1639 
1640     /* receive message of values*/
1641     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1642     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1643     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1644 
1645     /* insert into matrix */
1646     jj      = rstart;
1647     smycols = mycols;
1648     svals   = vals;
1649     for ( i=0; i<m; i++ ) {
1650       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1651       smycols += ourlens[i];
1652       svals   += ourlens[i];
1653       jj++;
1654     }
1655   }
1656   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1657 
1658   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1659   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1660   return 0;
1661 }
1662