xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 639f9d9dbbc54d6ac4e42e98283c540b41bb2cee)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.172 1996/11/01 23:25:33 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 int CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
30                            PetscTruth *done)
31 {
32   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
33   int        ierr;
34   if (aij->size == 1) {
35     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
36   } else SETERRQ(1,"MatGetRowIJ_MPIAIJ:not supported in parallel");
37   return 0;
38 }
39 
40 static int MatRestoreRowIJ_MPIAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
41                                PetscTruth *done)
42 {
43   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
44   int        ierr;
45   if (aij->size == 1) {
46     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
47   } else SETERRQ(1,"MatRestoreRowIJ_MPIAIJ:not supported in parallel");
48   return 0;
49 }
50 
51 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
52 
53 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
54 {
55   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
56   Scalar     value;
57   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
58   int        cstart = aij->cstart, cend = aij->cend,row,col;
59   int        roworiented = aij->roworiented;
60 
61 #if defined(PETSC_BOPT_g)
62   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
63     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
64   }
65 #endif
66   aij->insertmode = addv;
67   for ( i=0; i<m; i++ ) {
68 #if defined(PETSC_BOPT_g)
69     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
70     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
71 #endif
72     if (im[i] >= rstart && im[i] < rend) {
73       row = im[i] - rstart;
74       for ( j=0; j<n; j++ ) {
75         if (in[j] >= cstart && in[j] < cend){
76           col = in[j] - cstart;
77           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
78           ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
79         }
80 #if defined(PETSC_BOPT_g)
81         else if (in[j] < 0) {SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");}
82         else if (in[j] >= aij->N) {SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");}
83 #endif
84         else {
85           if (mat->was_assembled) {
86             if (!aij->colmap) {
87               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
88             }
89             col = aij->colmap[in[j]] - 1;
90             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
91               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
92               col =  in[j];
93             }
94           }
95           else col = in[j];
96           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
97           ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
98         }
99       }
100     }
101     else {
102       if (roworiented) {
103         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
104       }
105       else {
106         row = im[i];
107         for ( j=0; j<n; j++ ) {
108           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
109         }
110       }
111     }
112   }
113   return 0;
114 }
115 
116 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
117 {
118   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
119   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
120   int        cstart = aij->cstart, cend = aij->cend,row,col;
121 
122   for ( i=0; i<m; i++ ) {
123     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
124     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
125     if (idxm[i] >= rstart && idxm[i] < rend) {
126       row = idxm[i] - rstart;
127       for ( j=0; j<n; j++ ) {
128         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
129         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
130         if (idxn[j] >= cstart && idxn[j] < cend){
131           col = idxn[j] - cstart;
132           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
133         }
134         else {
135           if (!aij->colmap) {
136             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
137           }
138           col = aij->colmap[idxn[j]] - 1;
139           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
140           else {
141             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
142           }
143         }
144       }
145     }
146     else {
147       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
148     }
149   }
150   return 0;
151 }
152 
153 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
154 {
155   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
156   MPI_Comm    comm = mat->comm;
157   int         size = aij->size, *owners = aij->rowners;
158   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
159   MPI_Request *send_waits,*recv_waits;
160   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
161   InsertMode  addv;
162   Scalar      *rvalues,*svalues;
163 
164   /* make sure all processors are either in INSERTMODE or ADDMODE */
165   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
166   if (addv == (ADD_VALUES|INSERT_VALUES)) {
167     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
168   }
169   aij->insertmode = addv; /* in case this processor had no cache */
170 
171   /*  first count number of contributors to each processor */
172   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
173   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
174   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
175   for ( i=0; i<aij->stash.n; i++ ) {
176     idx = aij->stash.idx[i];
177     for ( j=0; j<size; j++ ) {
178       if (idx >= owners[j] && idx < owners[j+1]) {
179         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
180       }
181     }
182   }
183   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
184 
185   /* inform other processors of number of messages and max length*/
186   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
187   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
188   nreceives = work[rank];
189   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
190   nmax = work[rank];
191   PetscFree(work);
192 
193   /* post receives:
194        1) each message will consist of ordered pairs
195      (global index,value) we store the global index as a double
196      to simplify the message passing.
197        2) since we don't know how long each individual message is we
198      allocate the largest needed buffer for each receive. Potentially
199      this is a lot of wasted space.
200 
201 
202        This could be done better.
203   */
204   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
205   CHKPTRQ(rvalues);
206   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
207   CHKPTRQ(recv_waits);
208   for ( i=0; i<nreceives; i++ ) {
209     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
210               comm,recv_waits+i);
211   }
212 
213   /* do sends:
214       1) starts[i] gives the starting index in svalues for stuff going to
215          the ith processor
216   */
217   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
218   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
219   CHKPTRQ(send_waits);
220   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
221   starts[0] = 0;
222   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
223   for ( i=0; i<aij->stash.n; i++ ) {
224     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
225     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
226     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
227   }
228   PetscFree(owner);
229   starts[0] = 0;
230   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
231   count = 0;
232   for ( i=0; i<size; i++ ) {
233     if (procs[i]) {
234       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
235                 comm,send_waits+count++);
236     }
237   }
238   PetscFree(starts); PetscFree(nprocs);
239 
240   /* Free cache space */
241   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
242   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
243 
244   aij->svalues    = svalues;    aij->rvalues    = rvalues;
245   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
246   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
247   aij->rmax       = nmax;
248 
249   return 0;
250 }
251 extern int MatSetUpMultiply_MPIAIJ(Mat);
252 
253 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
254 {
255   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
256   MPI_Status  *send_status,recv_status;
257   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
258   int         row,col,other_disassembled;
259   Scalar      *values,val;
260   InsertMode  addv = aij->insertmode;
261 
262   /*  wait on receives */
263   while (count) {
264     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
265     /* unpack receives into our local space */
266     values = aij->rvalues + 3*imdex*aij->rmax;
267     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
268     n = n/3;
269     for ( i=0; i<n; i++ ) {
270       row = (int) PetscReal(values[3*i]) - aij->rstart;
271       col = (int) PetscReal(values[3*i+1]);
272       val = values[3*i+2];
273       if (col >= aij->cstart && col < aij->cend) {
274         col -= aij->cstart;
275         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
276       }
277       else {
278         if (mat->was_assembled || mat->assembled) {
279           if (!aij->colmap) {
280             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
281           }
282           col = aij->colmap[col] - 1;
283           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
284             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
285             col = (int) PetscReal(values[3*i+1]);
286           }
287         }
288         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
289       }
290     }
291     count--;
292   }
293   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
294 
295   /* wait on sends */
296   if (aij->nsends) {
297     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
298     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
299     PetscFree(send_status);
300   }
301   PetscFree(aij->send_waits); PetscFree(aij->svalues);
302 
303   aij->insertmode = NOT_SET_VALUES;
304   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
305   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
306 
307   /* determine if any processor has disassembled, if so we must
308      also disassemble ourselfs, in order that we may reassemble. */
309   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
310   if (mat->was_assembled && !other_disassembled) {
311     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
312   }
313 
314   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
315     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
316   }
317   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
318   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
319 
320   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
321   return 0;
322 }
323 
324 static int MatZeroEntries_MPIAIJ(Mat A)
325 {
326   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
327   int        ierr;
328   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
329   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
330   return 0;
331 }
332 
333 /* the code does not do the diagonal entries correctly unless the
334    matrix is square and the column and row owerships are identical.
335    This is a BUG. The only way to fix it seems to be to access
336    aij->A and aij->B directly and not through the MatZeroRows()
337    routine.
338 */
339 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
340 {
341   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
342   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
343   int            *procs,*nprocs,j,found,idx,nsends,*work;
344   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
345   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
346   int            *lens,imdex,*lrows,*values;
347   MPI_Comm       comm = A->comm;
348   MPI_Request    *send_waits,*recv_waits;
349   MPI_Status     recv_status,*send_status;
350   IS             istmp;
351 
352   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
353   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
354 
355   /*  first count number of contributors to each processor */
356   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
357   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
358   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
359   for ( i=0; i<N; i++ ) {
360     idx = rows[i];
361     found = 0;
362     for ( j=0; j<size; j++ ) {
363       if (idx >= owners[j] && idx < owners[j+1]) {
364         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
365       }
366     }
367     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
368   }
369   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
370 
371   /* inform other processors of number of messages and max length*/
372   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
373   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
374   nrecvs = work[rank];
375   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
376   nmax = work[rank];
377   PetscFree(work);
378 
379   /* post receives:   */
380   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
381   CHKPTRQ(rvalues);
382   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
383   CHKPTRQ(recv_waits);
384   for ( i=0; i<nrecvs; i++ ) {
385     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
386   }
387 
388   /* do sends:
389       1) starts[i] gives the starting index in svalues for stuff going to
390          the ith processor
391   */
392   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
393   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
394   CHKPTRQ(send_waits);
395   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
396   starts[0] = 0;
397   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
398   for ( i=0; i<N; i++ ) {
399     svalues[starts[owner[i]]++] = rows[i];
400   }
401   ISRestoreIndices(is,&rows);
402 
403   starts[0] = 0;
404   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
405   count = 0;
406   for ( i=0; i<size; i++ ) {
407     if (procs[i]) {
408       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
409     }
410   }
411   PetscFree(starts);
412 
413   base = owners[rank];
414 
415   /*  wait on receives */
416   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
417   source = lens + nrecvs;
418   count  = nrecvs; slen = 0;
419   while (count) {
420     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
421     /* unpack receives into our local space */
422     MPI_Get_count(&recv_status,MPI_INT,&n);
423     source[imdex]  = recv_status.MPI_SOURCE;
424     lens[imdex]  = n;
425     slen += n;
426     count--;
427   }
428   PetscFree(recv_waits);
429 
430   /* move the data into the send scatter */
431   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
432   count = 0;
433   for ( i=0; i<nrecvs; i++ ) {
434     values = rvalues + i*nmax;
435     for ( j=0; j<lens[i]; j++ ) {
436       lrows[count++] = values[j] - base;
437     }
438   }
439   PetscFree(rvalues); PetscFree(lens);
440   PetscFree(owner); PetscFree(nprocs);
441 
442   /* actually zap the local rows */
443   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
444   PLogObjectParent(A,istmp);
445   PetscFree(lrows);
446   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
447   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
448   ierr = ISDestroy(istmp); CHKERRQ(ierr);
449 
450   /* wait on sends */
451   if (nsends) {
452     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
453     CHKPTRQ(send_status);
454     MPI_Waitall(nsends,send_waits,send_status);
455     PetscFree(send_status);
456   }
457   PetscFree(send_waits); PetscFree(svalues);
458 
459   return 0;
460 }
461 
462 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
463 {
464   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
465   int        ierr,nt;
466 
467   ierr = VecGetLocalSize(xx,&nt);  CHKERRQ(ierr);
468   if (nt != a->n) {
469     SETERRQ(1,"MatMult_MPIAIJ:Incompatible parition of A and xx");
470   }
471   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
472   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
473   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx); CHKERRQ(ierr);
474   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
475   return 0;
476 }
477 
478 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
479 {
480   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
481   int        ierr;
482   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
483   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
484   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
485   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
486   return 0;
487 }
488 
489 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
490 {
491   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
492   int        ierr;
493 
494   /* do nondiagonal part */
495   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
496   /* send it on its way */
497   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
498   /* do local part */
499   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
500   /* receive remote parts: note this assumes the values are not actually */
501   /* inserted in yy until the next line, which is true for my implementation*/
502   /* but is not perhaps always true. */
503   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
504   return 0;
505 }
506 
507 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
508 {
509   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
510   int        ierr;
511 
512   /* do nondiagonal part */
513   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
514   /* send it on its way */
515   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
516   /* do local part */
517   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
518   /* receive remote parts: note this assumes the values are not actually */
519   /* inserted in yy until the next line, which is true for my implementation*/
520   /* but is not perhaps always true. */
521   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
522   return 0;
523 }
524 
525 /*
526   This only works correctly for square matrices where the subblock A->A is the
527    diagonal block
528 */
529 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
530 {
531   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
532   if (a->M != a->N)
533     SETERRQ(1,"MatGetDiagonal_MPIAIJ:Supports only square matrix where A->A is diag block");
534   if (a->rstart != a->cstart || a->rend != a->cend) {
535     SETERRQ(1,"MatGetDiagonal_MPIAIJ:row partition must equal col partition");  }
536   return MatGetDiagonal(a->A,v);
537 }
538 
539 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
540 {
541   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
542   int        ierr;
543   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
544   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
545   return 0;
546 }
547 
548 static int MatDestroy_MPIAIJ(PetscObject obj)
549 {
550   Mat        mat = (Mat) obj;
551   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
552   int        ierr;
553 #if defined(PETSC_LOG)
554   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
555 #endif
556   PetscFree(aij->rowners);
557   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
558   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
559   if (aij->colmap) PetscFree(aij->colmap);
560   if (aij->garray) PetscFree(aij->garray);
561   if (aij->lvec)   VecDestroy(aij->lvec);
562   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
563   if (aij->rowvalues) PetscFree(aij->rowvalues);
564   PetscFree(aij);
565   PLogObjectDestroy(mat);
566   PetscHeaderDestroy(mat);
567   return 0;
568 }
569 #include "draw.h"
570 #include "pinclude/pviewer.h"
571 
572 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
573 {
574   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
575   int         ierr;
576 
577   if (aij->size == 1) {
578     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
579   }
580   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
581   return 0;
582 }
583 
584 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
585 {
586   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
587   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
588   int         ierr, format,shift = C->indexshift,rank;
589   FILE        *fd;
590   ViewerType  vtype;
591 
592   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
593   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
594     ierr = ViewerGetFormat(viewer,&format);
595     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
596       MatInfo info;
597       int     flg;
598       MPI_Comm_rank(mat->comm,&rank);
599       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
600       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
601       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
602       PetscSequentialPhaseBegin(mat->comm,1);
603       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
604          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
605       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
606          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
607       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);
608       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
609       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);
610       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
611       fflush(fd);
612       PetscSequentialPhaseEnd(mat->comm,1);
613       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
614       return 0;
615     }
616     else if (format == VIEWER_FORMAT_ASCII_INFO) {
617       return 0;
618     }
619   }
620 
621   if (vtype == DRAW_VIEWER) {
622     Draw       draw;
623     PetscTruth isnull;
624     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
625     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
626   }
627 
628   if (vtype == ASCII_FILE_VIEWER) {
629     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
630     PetscSequentialPhaseBegin(mat->comm,1);
631     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
632            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
633            aij->cend);
634     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
635     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
636     fflush(fd);
637     PetscSequentialPhaseEnd(mat->comm,1);
638   }
639   else {
640     int size = aij->size;
641     rank = aij->rank;
642     if (size == 1) {
643       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
644     }
645     else {
646       /* assemble the entire matrix onto first processor. */
647       Mat         A;
648       Mat_SeqAIJ *Aloc;
649       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
650       Scalar      *a;
651 
652       if (!rank) {
653         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
654                CHKERRQ(ierr);
655       }
656       else {
657         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
658                CHKERRQ(ierr);
659       }
660       PLogObjectParent(mat,A);
661 
662       /* copy over the A part */
663       Aloc = (Mat_SeqAIJ*) aij->A->data;
664       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
665       row = aij->rstart;
666       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
667       for ( i=0; i<m; i++ ) {
668         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
669         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
670       }
671       aj = Aloc->j;
672       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
673 
674       /* copy over the B part */
675       Aloc = (Mat_SeqAIJ*) aij->B->data;
676       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
677       row = aij->rstart;
678       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
679       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
680       for ( i=0; i<m; i++ ) {
681         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
682         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
683       }
684       PetscFree(ct);
685       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
686       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
687       if (!rank) {
688         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
689       }
690       ierr = MatDestroy(A); CHKERRQ(ierr);
691     }
692   }
693   return 0;
694 }
695 
696 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
697 {
698   Mat         mat = (Mat) obj;
699   int         ierr;
700   ViewerType  vtype;
701 
702   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
703   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
704       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
705     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
706   }
707   else if (vtype == BINARY_FILE_VIEWER) {
708     return MatView_MPIAIJ_Binary(mat,viewer);
709   }
710   return 0;
711 }
712 
713 /*
714     This has to provide several versions.
715 
716      2) a) use only local smoothing updating outer values only once.
717         b) local smoothing updating outer values each inner iteration
718      3) color updating out values betwen colors.
719 */
720 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
721                            double fshift,int its,Vec xx)
722 {
723   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
724   Mat        AA = mat->A, BB = mat->B;
725   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
726   Scalar     *b,*x,*xs,*ls,d,*v,sum;
727   int        ierr,*idx, *diag;
728   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
729 
730   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
731   xs = x + shift; /* shift by one for index start of 1 */
732   ls = ls + shift;
733   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
734   diag = A->diag;
735   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
736     if (flag & SOR_ZERO_INITIAL_GUESS) {
737       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
738     }
739     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
740     CHKERRQ(ierr);
741     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
742     CHKERRQ(ierr);
743     while (its--) {
744       /* go down through the rows */
745       for ( i=0; i<m; i++ ) {
746         n    = A->i[i+1] - A->i[i];
747         idx  = A->j + A->i[i] + shift;
748         v    = A->a + A->i[i] + shift;
749         sum  = b[i];
750         SPARSEDENSEMDOT(sum,xs,v,idx,n);
751         d    = fshift + A->a[diag[i]+shift];
752         n    = B->i[i+1] - B->i[i];
753         idx  = B->j + B->i[i] + shift;
754         v    = B->a + B->i[i] + shift;
755         SPARSEDENSEMDOT(sum,ls,v,idx,n);
756         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
757       }
758       /* come up through the rows */
759       for ( i=m-1; i>-1; i-- ) {
760         n    = A->i[i+1] - A->i[i];
761         idx  = A->j + A->i[i] + shift;
762         v    = A->a + A->i[i] + shift;
763         sum  = b[i];
764         SPARSEDENSEMDOT(sum,xs,v,idx,n);
765         d    = fshift + A->a[diag[i]+shift];
766         n    = B->i[i+1] - B->i[i];
767         idx  = B->j + B->i[i] + shift;
768         v    = B->a + B->i[i] + shift;
769         SPARSEDENSEMDOT(sum,ls,v,idx,n);
770         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
771       }
772     }
773   }
774   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
775     if (flag & SOR_ZERO_INITIAL_GUESS) {
776       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
777     }
778     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
779     CHKERRQ(ierr);
780     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
781     CHKERRQ(ierr);
782     while (its--) {
783       for ( i=0; i<m; i++ ) {
784         n    = A->i[i+1] - A->i[i];
785         idx  = A->j + A->i[i] + shift;
786         v    = A->a + A->i[i] + shift;
787         sum  = b[i];
788         SPARSEDENSEMDOT(sum,xs,v,idx,n);
789         d    = fshift + A->a[diag[i]+shift];
790         n    = B->i[i+1] - B->i[i];
791         idx  = B->j + B->i[i] + shift;
792         v    = B->a + B->i[i] + shift;
793         SPARSEDENSEMDOT(sum,ls,v,idx,n);
794         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
795       }
796     }
797   }
798   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
799     if (flag & SOR_ZERO_INITIAL_GUESS) {
800       return (*mat->A->ops.relax)(mat->A,bb,omega,flag,fshift,its,xx);
801     }
802     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
803                             mat->Mvctx); CHKERRQ(ierr);
804     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
805                             mat->Mvctx); CHKERRQ(ierr);
806     while (its--) {
807       for ( i=m-1; i>-1; i-- ) {
808         n    = A->i[i+1] - A->i[i];
809         idx  = A->j + A->i[i] + shift;
810         v    = A->a + A->i[i] + shift;
811         sum  = b[i];
812         SPARSEDENSEMDOT(sum,xs,v,idx,n);
813         d    = fshift + A->a[diag[i]+shift];
814         n    = B->i[i+1] - B->i[i];
815         idx  = B->j + B->i[i] + shift;
816         v    = B->a + B->i[i] + shift;
817         SPARSEDENSEMDOT(sum,ls,v,idx,n);
818         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
819       }
820     }
821   }
822   else {
823     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
824   }
825   return 0;
826 }
827 
828 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
829 {
830   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
831   Mat        A = mat->A, B = mat->B;
832   int        ierr;
833   double     isend[5], irecv[5];
834 
835   info->rows_global    = (double)mat->M;
836   info->columns_global = (double)mat->N;
837   info->rows_local     = (double)mat->m;
838   info->columns_local  = (double)mat->N;
839   info->block_size     = 1.0;
840   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
841   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
842   isend[3] = info->memory;  isend[4] = info->mallocs;
843   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
844   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
845   isend[3] += info->memory;  isend[4] += info->mallocs;
846   if (flag == MAT_LOCAL) {
847     info->nz_used      = isend[0];
848     info->nz_allocated = isend[1];
849     info->nz_unneeded  = isend[2];
850     info->memory       = isend[3];
851     info->mallocs      = isend[4];
852   } else if (flag == MAT_GLOBAL_MAX) {
853     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);
854     info->nz_used      = irecv[0];
855     info->nz_allocated = irecv[1];
856     info->nz_unneeded  = irecv[2];
857     info->memory       = irecv[3];
858     info->mallocs      = irecv[4];
859   } else if (flag == MAT_GLOBAL_SUM) {
860     MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);
861     info->nz_used      = irecv[0];
862     info->nz_allocated = irecv[1];
863     info->nz_unneeded  = irecv[2];
864     info->memory       = irecv[3];
865     info->mallocs      = irecv[4];
866   }
867   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
868   info->fill_ratio_needed = 0;
869   info->factor_mallocs    = 0;
870 
871   return 0;
872 }
873 
874 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
875 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
876 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
877 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
878 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
879 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
880 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
881 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
882 
883 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
884 {
885   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
886 
887   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
888       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
889       op == MAT_COLUMNS_SORTED ||
890       op == MAT_ROW_ORIENTED) {
891         MatSetOption(a->A,op);
892         MatSetOption(a->B,op);
893   }
894   else if (op == MAT_ROWS_SORTED ||
895            op == MAT_SYMMETRIC ||
896            op == MAT_STRUCTURALLY_SYMMETRIC ||
897            op == MAT_YES_NEW_DIAGONALS)
898     PLogInfo(A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
899   else if (op == MAT_COLUMN_ORIENTED) {
900     a->roworiented = 0;
901     MatSetOption(a->A,op);
902     MatSetOption(a->B,op);
903   }
904   else if (op == MAT_NO_NEW_DIAGONALS)
905     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:MAT_NO_NEW_DIAGONALS");}
906   else
907     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
908   return 0;
909 }
910 
911 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
912 {
913   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
914   *m = mat->M; *n = mat->N;
915   return 0;
916 }
917 
918 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
919 {
920   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
921   *m = mat->m; *n = mat->N;
922   return 0;
923 }
924 
925 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
926 {
927   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
928   *m = mat->rstart; *n = mat->rend;
929   return 0;
930 }
931 
932 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
933 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
934 
935 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
936 {
937   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
938   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
939   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
940   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
941   int        *cmap, *idx_p;
942 
943   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
944   mat->getrowactive = PETSC_TRUE;
945 
946   if (!mat->rowvalues && (idx || v)) {
947     /*
948         allocate enough space to hold information from the longest row.
949     */
950     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
951     int     max = 1,m = mat->m,tmp;
952     for ( i=0; i<m; i++ ) {
953       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
954       if (max < tmp) { max = tmp; }
955     }
956     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
957     CHKPTRQ(mat->rowvalues);
958     mat->rowindices = (int *) (mat->rowvalues + max);
959   }
960 
961   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
962   lrow = row - rstart;
963 
964   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
965   if (!v)   {pvA = 0; pvB = 0;}
966   if (!idx) {pcA = 0; if (!v) pcB = 0;}
967   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
968   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
969   nztot = nzA + nzB;
970 
971   cmap  = mat->garray;
972   if (v  || idx) {
973     if (nztot) {
974       /* Sort by increasing column numbers, assuming A and B already sorted */
975       int imark = -1;
976       if (v) {
977         *v = v_p = mat->rowvalues;
978         for ( i=0; i<nzB; i++ ) {
979           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
980           else break;
981         }
982         imark = i;
983         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
984         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
985       }
986       if (idx) {
987         *idx = idx_p = mat->rowindices;
988         if (imark > -1) {
989           for ( i=0; i<imark; i++ ) {
990             idx_p[i] = cmap[cworkB[i]];
991           }
992         } else {
993           for ( i=0; i<nzB; i++ ) {
994             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
995             else break;
996           }
997           imark = i;
998         }
999         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1000         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1001       }
1002     }
1003     else {
1004       if (idx) *idx = 0;
1005       if (v)   *v   = 0;
1006     }
1007   }
1008   *nz = nztot;
1009   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1010   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1011   return 0;
1012 }
1013 
1014 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1015 {
1016   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1017   if (aij->getrowactive == PETSC_FALSE) {
1018     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
1019   }
1020   aij->getrowactive = PETSC_FALSE;
1021   return 0;
1022 }
1023 
1024 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1025 {
1026   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1027   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1028   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1029   double     sum = 0.0;
1030   Scalar     *v;
1031 
1032   if (aij->size == 1) {
1033     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1034   } else {
1035     if (type == NORM_FROBENIUS) {
1036       v = amat->a;
1037       for (i=0; i<amat->nz; i++ ) {
1038 #if defined(PETSC_COMPLEX)
1039         sum += real(conj(*v)*(*v)); v++;
1040 #else
1041         sum += (*v)*(*v); v++;
1042 #endif
1043       }
1044       v = bmat->a;
1045       for (i=0; i<bmat->nz; i++ ) {
1046 #if defined(PETSC_COMPLEX)
1047         sum += real(conj(*v)*(*v)); v++;
1048 #else
1049         sum += (*v)*(*v); v++;
1050 #endif
1051       }
1052       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1053       *norm = sqrt(*norm);
1054     }
1055     else if (type == NORM_1) { /* max column norm */
1056       double *tmp, *tmp2;
1057       int    *jj, *garray = aij->garray;
1058       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1059       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1060       PetscMemzero(tmp,aij->N*sizeof(double));
1061       *norm = 0.0;
1062       v = amat->a; jj = amat->j;
1063       for ( j=0; j<amat->nz; j++ ) {
1064         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1065       }
1066       v = bmat->a; jj = bmat->j;
1067       for ( j=0; j<bmat->nz; j++ ) {
1068         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1069       }
1070       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1071       for ( j=0; j<aij->N; j++ ) {
1072         if (tmp2[j] > *norm) *norm = tmp2[j];
1073       }
1074       PetscFree(tmp); PetscFree(tmp2);
1075     }
1076     else if (type == NORM_INFINITY) { /* max row norm */
1077       double ntemp = 0.0;
1078       for ( j=0; j<amat->m; j++ ) {
1079         v = amat->a + amat->i[j] + shift;
1080         sum = 0.0;
1081         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1082           sum += PetscAbsScalar(*v); v++;
1083         }
1084         v = bmat->a + bmat->i[j] + shift;
1085         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1086           sum += PetscAbsScalar(*v); v++;
1087         }
1088         if (sum > ntemp) ntemp = sum;
1089       }
1090       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1091     }
1092     else {
1093       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1094     }
1095   }
1096   return 0;
1097 }
1098 
1099 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1100 {
1101   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1102   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1103   int        ierr,shift = Aloc->indexshift;
1104   Mat        B;
1105   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1106   Scalar     *array;
1107 
1108   if (matout == PETSC_NULL && M != N)
1109     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1110   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1111          PETSC_NULL,&B); CHKERRQ(ierr);
1112 
1113   /* copy over the A part */
1114   Aloc = (Mat_SeqAIJ*) a->A->data;
1115   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1116   row = a->rstart;
1117   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1118   for ( i=0; i<m; i++ ) {
1119     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1120     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1121   }
1122   aj = Aloc->j;
1123   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1124 
1125   /* copy over the B part */
1126   Aloc = (Mat_SeqAIJ*) a->B->data;
1127   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1128   row = a->rstart;
1129   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1130   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1131   for ( i=0; i<m; i++ ) {
1132     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1133     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1134   }
1135   PetscFree(ct);
1136   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1137   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1138   if (matout != PETSC_NULL) {
1139     *matout = B;
1140   } else {
1141     /* This isn't really an in-place transpose .... but free data structures from a */
1142     PetscFree(a->rowners);
1143     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1144     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1145     if (a->colmap) PetscFree(a->colmap);
1146     if (a->garray) PetscFree(a->garray);
1147     if (a->lvec) VecDestroy(a->lvec);
1148     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1149     PetscFree(a);
1150     PetscMemcpy(A,B,sizeof(struct _Mat));
1151     PetscHeaderDestroy(B);
1152   }
1153   return 0;
1154 }
1155 
1156 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1157 {
1158   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1159   Mat a = aij->A, b = aij->B;
1160   int ierr,s1,s2,s3;
1161 
1162   ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr);
1163   if (rr) {
1164     s3 = aij->n;
1165     VecGetLocalSize_Fast(rr,s1);
1166     if (s1!=s3) SETERRQ(1,"MatDiagonalScale: right vector non-conforming local size");
1167     /* Overlap communication with computation. */
1168     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr);
1169   }
1170   if (ll) {
1171     VecGetLocalSize_Fast(ll,s1);
1172     if (s1!=s2) SETERRQ(1,"MatDiagonalScale: left vector non-conforming local size");
1173     ierr = (*b->ops.diagonalscale)(b,ll,0); CHKERRQ(ierr);
1174   }
1175   /* scale  the diagonal block */
1176   ierr = (*a->ops.diagonalscale)(a,ll,rr); CHKERRQ(ierr);
1177 
1178   if (rr) {
1179     /* Do a scatter end and then right scale the off-diagonal block */
1180     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_ALL,aij->Mvctx); CHKERRQ(ierr);
1181     ierr = (*b->ops.diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr);
1182   }
1183 
1184   return 0;
1185 }
1186 
1187 
1188 extern int MatPrintHelp_SeqAIJ(Mat);
1189 static int MatPrintHelp_MPIAIJ(Mat A)
1190 {
1191   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1192 
1193   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1194   else return 0;
1195 }
1196 
1197 static int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1198 {
1199   *bs = 1;
1200   return 0;
1201 }
1202 
1203 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1204 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1205 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1206 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1207 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1208 /* -------------------------------------------------------------------*/
1209 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1210        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1211        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1212        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1213        0,0,
1214        0,0,
1215        0,0,
1216        MatRelax_MPIAIJ,
1217        MatTranspose_MPIAIJ,
1218        MatGetInfo_MPIAIJ,0,
1219        MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ,
1220        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1221        0,
1222        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1223        0,0,0,0,
1224        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1225        0,0,
1226        0,0,MatConvert_MPIAIJ,MatConvertSameType_MPIAIJ,0,0,
1227        0,0,0,
1228        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1229        MatPrintHelp_MPIAIJ,
1230        MatScale_MPIAIJ,0,0,0,
1231        MatGetBlockSize_MPIAIJ,0,0,0,0,
1232        MatFDColoringCreate_MPIAIJ};
1233 
1234 
1235 /*@C
1236    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1237    (the default parallel PETSc format).  For good matrix assembly performance
1238    the user should preallocate the matrix storage by setting the parameters
1239    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1240    performance can be increased by more than a factor of 50.
1241 
1242    Input Parameters:
1243 .  comm - MPI communicator
1244 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1245            This value should be the same as the local size used in creating the
1246            y vector for the matrix-vector product y = Ax.
1247 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1248            This value should be the same as the local size used in creating the
1249            x vector for the matrix-vector product y = Ax.
1250 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1251 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1252 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1253            (same for all local rows)
1254 .  d_nzz - array containing the number of nonzeros in the various rows of the
1255            diagonal portion of the local submatrix (possibly different for each row)
1256            or PETSC_NULL. You must leave room for the diagonal entry even if
1257            it is zero.
1258 .  o_nz - number of nonzeros per row in the off-diagonal portion of local
1259            submatrix (same for all local rows).
1260 .  o_nzz - array containing the number of nonzeros in the various rows of the
1261            off-diagonal portion of the local submatrix (possibly different for
1262            each row) or PETSC_NULL.
1263 
1264    Output Parameter:
1265 .  A - the matrix
1266 
1267    Notes:
1268    The AIJ format (also called the Yale sparse matrix format or
1269    compressed row storage), is fully compatible with standard Fortran 77
1270    storage.  That is, the stored row and column indices can begin at
1271    either one (as in Fortran) or zero.  See the users manual for details.
1272 
1273    The user MUST specify either the local or global matrix dimensions
1274    (possibly both).
1275 
1276    By default, this format uses inodes (identical nodes) when possible.
1277    We search for consecutive rows with the same nonzero structure, thereby
1278    reusing matrix information to achieve increased efficiency.
1279 
1280    Options Database Keys:
1281 $    -mat_aij_no_inode  - Do not use inodes
1282 $    -mat_aij_inode_limit <limit> - Set inode limit.
1283 $        (max limit=5)
1284 $    -mat_aij_oneindex - Internally use indexing starting at 1
1285 $        rather than 0.  Note: When calling MatSetValues(),
1286 $        the user still MUST index entries starting at 0!
1287 
1288    Storage Information:
1289    For a square global matrix we define each processor's diagonal portion
1290    to be its local rows and the corresponding columns (a square submatrix);
1291    each processor's off-diagonal portion encompasses the remainder of the
1292    local matrix (a rectangular submatrix).
1293 
1294    The user can specify preallocated storage for the diagonal part of
1295    the local submatrix with either d_nz or d_nnz (not both).  Set
1296    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1297    memory allocation.  Likewise, specify preallocated storage for the
1298    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1299 
1300    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1301    the figure below we depict these three local rows and all columns (0-11).
1302 
1303 $          0 1 2 3 4 5 6 7 8 9 10 11
1304 $         -------------------
1305 $  row 3  |  o o o d d d o o o o o o
1306 $  row 4  |  o o o d d d o o o o o o
1307 $  row 5  |  o o o d d d o o o o o o
1308 $         -------------------
1309 $
1310 
1311    Thus, any entries in the d locations are stored in the d (diagonal)
1312    submatrix, and any entries in the o locations are stored in the
1313    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1314    stored simply in the MATSEQAIJ format for compressed row storage.
1315 
1316    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1317    and o_nz should indicate the number of nonzeros per row in the o matrix.
1318    In general, for PDE problems in which most nonzeros are near the diagonal,
1319    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
1320    or you will get TERRIBLE performance; see the users' manual chapter on
1321    matrices and the file $(PETSC_DIR)/Performance.
1322 
1323 .keywords: matrix, aij, compressed row, sparse, parallel
1324 
1325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1326 @*/
1327 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1328                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1329 {
1330   Mat          B;
1331   Mat_MPIAIJ   *b;
1332   int          ierr, i,sum[2],work[2],size;
1333 
1334   *A = 0;
1335   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1336   PLogObjectCreate(B);
1337   B->data       = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b);
1338   PetscMemzero(b,sizeof(Mat_MPIAIJ));
1339   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1340   MPI_Comm_size(comm,&size);
1341   if (size == 1) {
1342     B->ops.getrowij          = MatGetRowIJ_MPIAIJ;
1343     B->ops.restorerowij      = MatRestoreRowIJ_MPIAIJ;
1344     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIAIJ;
1345     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIAIJ;
1346     B->ops.lufactor          = MatLUFactor_MPIAIJ;
1347     B->ops.solve             = MatSolve_MPIAIJ;
1348     B->ops.solveadd          = MatSolveAdd_MPIAIJ;
1349     B->ops.solvetrans        = MatSolveTrans_MPIAIJ;
1350     B->ops.solvetransadd     = MatSolveTransAdd_MPIAIJ;
1351     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIAIJ;
1352   }
1353   B->destroy    = MatDestroy_MPIAIJ;
1354   B->view       = MatView_MPIAIJ;
1355   B->factor     = 0;
1356   B->assembled  = PETSC_FALSE;
1357 
1358   b->insertmode = NOT_SET_VALUES;
1359   MPI_Comm_rank(comm,&b->rank);
1360   MPI_Comm_size(comm,&b->size);
1361 
1362   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1363     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1364 
1365   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1366     work[0] = m; work[1] = n;
1367     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1368     if (M == PETSC_DECIDE) M = sum[0];
1369     if (N == PETSC_DECIDE) N = sum[1];
1370   }
1371   if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);}
1372   if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);}
1373   b->m = m; B->m = m;
1374   b->n = n; B->n = n;
1375   b->N = N; B->N = N;
1376   b->M = M; B->M = M;
1377 
1378   /* build local table of row and column ownerships */
1379   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1380   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1381   b->cowners = b->rowners + b->size + 2;
1382   MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
1383   b->rowners[0] = 0;
1384   for ( i=2; i<=b->size; i++ ) {
1385     b->rowners[i] += b->rowners[i-1];
1386   }
1387   b->rstart = b->rowners[b->rank];
1388   b->rend   = b->rowners[b->rank+1];
1389   MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
1390   b->cowners[0] = 0;
1391   for ( i=2; i<=b->size; i++ ) {
1392     b->cowners[i] += b->cowners[i-1];
1393   }
1394   b->cstart = b->cowners[b->rank];
1395   b->cend   = b->cowners[b->rank+1];
1396 
1397   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1398   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1399   PLogObjectParent(B,b->A);
1400   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1401   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1402   PLogObjectParent(B,b->B);
1403 
1404   /* build cache for off array entries formed */
1405   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1406   b->colmap      = 0;
1407   b->garray      = 0;
1408   b->roworiented = 1;
1409 
1410   /* stuff used for matrix vector multiply */
1411   b->lvec      = 0;
1412   b->Mvctx     = 0;
1413 
1414   /* stuff for MatGetRow() */
1415   b->rowindices   = 0;
1416   b->rowvalues    = 0;
1417   b->getrowactive = PETSC_FALSE;
1418 
1419   *A = B;
1420   return 0;
1421 }
1422 
1423 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1424 {
1425   Mat        mat;
1426   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1427   int        ierr, len=0, flg;
1428 
1429   *newmat       = 0;
1430   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1431   PLogObjectCreate(mat);
1432   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1433   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1434   mat->destroy    = MatDestroy_MPIAIJ;
1435   mat->view       = MatView_MPIAIJ;
1436   mat->factor     = matin->factor;
1437   mat->assembled  = PETSC_TRUE;
1438 
1439   a->m = mat->m   = oldmat->m;
1440   a->n = mat->n   = oldmat->n;
1441   a->M = mat->M   = oldmat->M;
1442   a->N = mat->N   = oldmat->N;
1443 
1444   a->rstart       = oldmat->rstart;
1445   a->rend         = oldmat->rend;
1446   a->cstart       = oldmat->cstart;
1447   a->cend         = oldmat->cend;
1448   a->size         = oldmat->size;
1449   a->rank         = oldmat->rank;
1450   a->insertmode   = NOT_SET_VALUES;
1451   a->rowvalues    = 0;
1452   a->getrowactive = PETSC_FALSE;
1453 
1454   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1455   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1456   a->cowners = a->rowners + a->size + 2;
1457   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1458   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1459   if (oldmat->colmap) {
1460     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1461     PLogObjectMemory(mat,(a->N)*sizeof(int));
1462     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1463   } else a->colmap = 0;
1464   if (oldmat->garray) {
1465     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1466     a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray);
1467     PLogObjectMemory(mat,len*sizeof(int));
1468     if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1469   } else a->garray = 0;
1470 
1471   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1472   PLogObjectParent(mat,a->lvec);
1473   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1474   PLogObjectParent(mat,a->Mvctx);
1475   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1476   PLogObjectParent(mat,a->A);
1477   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1478   PLogObjectParent(mat,a->B);
1479   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1480   if (flg) {
1481     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1482   }
1483   *newmat = mat;
1484   return 0;
1485 }
1486 
1487 #include "sys.h"
1488 
1489 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1490 {
1491   Mat          A;
1492   int          i, nz, ierr, j,rstart, rend, fd;
1493   Scalar       *vals,*svals;
1494   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1495   MPI_Status   status;
1496   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1497   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1498   int          tag = ((PetscObject)viewer)->tag;
1499 
1500   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1501   if (!rank) {
1502     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1503     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1504     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1505   }
1506 
1507   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1508   M = header[1]; N = header[2];
1509   /* determine ownership of all rows */
1510   m = M/size + ((M % size) > rank);
1511   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1512   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1513   rowners[0] = 0;
1514   for ( i=2; i<=size; i++ ) {
1515     rowners[i] += rowners[i-1];
1516   }
1517   rstart = rowners[rank];
1518   rend   = rowners[rank+1];
1519 
1520   /* distribute row lengths to all processors */
1521   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1522   offlens = ourlens + (rend-rstart);
1523   if (!rank) {
1524     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1525     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1526     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1527     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1528     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1529     PetscFree(sndcounts);
1530   }
1531   else {
1532     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1533   }
1534 
1535   if (!rank) {
1536     /* calculate the number of nonzeros on each processor */
1537     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1538     PetscMemzero(procsnz,size*sizeof(int));
1539     for ( i=0; i<size; i++ ) {
1540       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1541         procsnz[i] += rowlengths[j];
1542       }
1543     }
1544     PetscFree(rowlengths);
1545 
1546     /* determine max buffer needed and allocate it */
1547     maxnz = 0;
1548     for ( i=0; i<size; i++ ) {
1549       maxnz = PetscMax(maxnz,procsnz[i]);
1550     }
1551     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1552 
1553     /* read in my part of the matrix column indices  */
1554     nz = procsnz[0];
1555     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1556     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1557 
1558     /* read in every one elses and ship off */
1559     for ( i=1; i<size; i++ ) {
1560       nz = procsnz[i];
1561       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
1562       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1563     }
1564     PetscFree(cols);
1565   }
1566   else {
1567     /* determine buffer space needed for message */
1568     nz = 0;
1569     for ( i=0; i<m; i++ ) {
1570       nz += ourlens[i];
1571     }
1572     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1573 
1574     /* receive message of column indices*/
1575     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1576     MPI_Get_count(&status,MPI_INT,&maxnz);
1577     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1578   }
1579 
1580   /* loop over local rows, determining number of off diagonal entries */
1581   PetscMemzero(offlens,m*sizeof(int));
1582   jj = 0;
1583   for ( i=0; i<m; i++ ) {
1584     for ( j=0; j<ourlens[i]; j++ ) {
1585       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1586       jj++;
1587     }
1588   }
1589 
1590   /* create our matrix */
1591   for ( i=0; i<m; i++ ) {
1592     ourlens[i] -= offlens[i];
1593   }
1594   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1595   A = *newmat;
1596   MatSetOption(A,MAT_COLUMNS_SORTED);
1597   for ( i=0; i<m; i++ ) {
1598     ourlens[i] += offlens[i];
1599   }
1600 
1601   if (!rank) {
1602     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1603 
1604     /* read in my part of the matrix numerical values  */
1605     nz = procsnz[0];
1606     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1607 
1608     /* insert into matrix */
1609     jj      = rstart;
1610     smycols = mycols;
1611     svals   = vals;
1612     for ( i=0; i<m; i++ ) {
1613       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1614       smycols += ourlens[i];
1615       svals   += ourlens[i];
1616       jj++;
1617     }
1618 
1619     /* read in other processors and ship out */
1620     for ( i=1; i<size; i++ ) {
1621       nz = procsnz[i];
1622       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1623       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1624     }
1625     PetscFree(procsnz);
1626   }
1627   else {
1628     /* receive numeric values */
1629     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1630 
1631     /* receive message of values*/
1632     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1633     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1634     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1635 
1636     /* insert into matrix */
1637     jj      = rstart;
1638     smycols = mycols;
1639     svals   = vals;
1640     for ( i=0; i<m; i++ ) {
1641       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1642       smycols += ourlens[i];
1643       svals   += ourlens[i];
1644       jj++;
1645     }
1646   }
1647   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1648 
1649   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1650   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1651   return 0;
1652 }
1653