xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cb8a5a2e4a0f6e92c3fb2e84e505eeca89835e11)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.131 1996/03/08 05:47:18 bsmith Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int        ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
43   Scalar     value;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift,roworiented = aij->roworiented;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (im[i] >= rstart && im[i] < rend) {
56       row = im[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (in[j] >= cstart && in[j] < cend){
61           col = in[j] - cstart;
62           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
63           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
64         }
65         else {
66           if (mat->was_assembled) {
67             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
68             col = aij->colmap[in[j]] + shift;
69             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
70               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
71               col =  in[j];
72             }
73           }
74           else col = in[j];
75           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
76           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
77         }
78       }
79     }
80     else {
81       if (roworiented) {
82         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
83       }
84       else {
85         row = im[i];
86         for ( j=0; j<n; j++ ) {
87           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
88         }
89       }
90     }
91   }
92   return 0;
93 }
94 
95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96 {
97   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100   int        cstart = aij->cstart, cend = aij->cend,row,col;
101   int        shift = C->indexshift;
102 
103   for ( i=0; i<m; i++ ) {
104     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
105     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
106     if (idxm[i] >= rstart && idxm[i] < rend) {
107       row = idxm[i] - rstart;
108       for ( j=0; j<n; j++ ) {
109         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
110         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
111         if (idxn[j] >= cstart && idxn[j] < cend){
112           col = idxn[j] - cstart;
113           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
114         }
115         else {
116           col = aij->colmap[idxn[j]] + shift;
117           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
118         }
119       }
120     }
121     else {
122       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
123     }
124   }
125   return 0;
126 }
127 
128 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
129 {
130   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
131   MPI_Comm    comm = mat->comm;
132   int         size = aij->size, *owners = aij->rowners;
133   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
134   MPI_Request *send_waits,*recv_waits;
135   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
136   InsertMode  addv;
137   Scalar      *rvalues,*svalues;
138 
139   /* make sure all processors are either in INSERTMODE or ADDMODE */
140   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
141   if (addv == (ADD_VALUES|INSERT_VALUES)) {
142     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
143   }
144   aij->insertmode = addv; /* in case this processor had no cache */
145 
146   /*  first count number of contributors to each processor */
147   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
148   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
149   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
150   for ( i=0; i<aij->stash.n; i++ ) {
151     idx = aij->stash.idx[i];
152     for ( j=0; j<size; j++ ) {
153       if (idx >= owners[j] && idx < owners[j+1]) {
154         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
155       }
156     }
157   }
158   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
159 
160   /* inform other processors of number of messages and max length*/
161   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
162   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
163   nreceives = work[rank];
164   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
165   nmax = work[rank];
166   PetscFree(work);
167 
168   /* post receives:
169        1) each message will consist of ordered pairs
170      (global index,value) we store the global index as a double
171      to simplify the message passing.
172        2) since we don't know how long each individual message is we
173      allocate the largest needed buffer for each receive. Potentially
174      this is a lot of wasted space.
175 
176 
177        This could be done better.
178   */
179   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
180   CHKPTRQ(rvalues);
181   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
182   CHKPTRQ(recv_waits);
183   for ( i=0; i<nreceives; i++ ) {
184     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
185               comm,recv_waits+i);
186   }
187 
188   /* do sends:
189       1) starts[i] gives the starting index in svalues for stuff going to
190          the ith processor
191   */
192   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
193   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
194   CHKPTRQ(send_waits);
195   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
196   starts[0] = 0;
197   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
198   for ( i=0; i<aij->stash.n; i++ ) {
199     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
200     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
201     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
202   }
203   PetscFree(owner);
204   starts[0] = 0;
205   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
206   count = 0;
207   for ( i=0; i<size; i++ ) {
208     if (procs[i]) {
209       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
210                 comm,send_waits+count++);
211     }
212   }
213   PetscFree(starts); PetscFree(nprocs);
214 
215   /* Free cache space */
216   PLogInfo(0,"[%d]MatAssemblyBegin_MPIAIJ:Number of off processor values %d\n",rank,aij->stash.n);
217   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
218 
219   aij->svalues    = svalues;    aij->rvalues    = rvalues;
220   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
221   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
222   aij->rmax       = nmax;
223 
224   return 0;
225 }
226 extern int MatSetUpMultiply_MPIAIJ(Mat);
227 
228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
229 {
230   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
232   MPI_Status  *send_status,recv_status;
233   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234   int         row,col,other_disassembled,shift = C->indexshift;
235   Scalar      *values,val;
236   InsertMode  addv = aij->insertmode;
237 
238   /*  wait on receives */
239   while (count) {
240     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
241     /* unpack receives into our local space */
242     values = aij->rvalues + 3*imdex*aij->rmax;
243     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
244     n = n/3;
245     for ( i=0; i<n; i++ ) {
246       row = (int) PetscReal(values[3*i]) - aij->rstart;
247       col = (int) PetscReal(values[3*i+1]);
248       val = values[3*i+2];
249       if (col >= aij->cstart && col < aij->cend) {
250         col -= aij->cstart;
251         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
252       }
253       else {
254         if (mat->was_assembled) {
255           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256           col = aij->colmap[col] + shift;
257           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
258             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259             col = (int) PetscReal(values[3*i+1]);
260           }
261         }
262         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
263       }
264     }
265     count--;
266   }
267   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
268 
269   /* wait on sends */
270   if (aij->nsends) {
271     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
272     CHKPTRQ(send_status);
273     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
274     PetscFree(send_status);
275   }
276   PetscFree(aij->send_waits); PetscFree(aij->svalues);
277 
278   aij->insertmode = NOT_SET_VALUES;
279   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
280   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
281 
282   /* determine if any processor has disassembled, if so we must
283      also disassemble ourselfs, in order that we may reassemble. */
284   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285   if (mat->was_assembled && !other_disassembled) {
286     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
287   }
288 
289   if (!mat->was_assembled && mode == FINAL_ASSEMBLY) {
290     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
291   }
292   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
293   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
294 
295   if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;}
296   return 0;
297 }
298 
299 static int MatZeroEntries_MPIAIJ(Mat A)
300 {
301   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
302   int        ierr;
303   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
304   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
305   return 0;
306 }
307 
308 /* the code does not do the diagonal entries correctly unless the
309    matrix is square and the column and row owerships are identical.
310    This is a BUG. The only way to fix it seems to be to access
311    aij->A and aij->B directly and not through the MatZeroRows()
312    routine.
313 */
314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
315 {
316   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
317   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
318   int            *procs,*nprocs,j,found,idx,nsends,*work;
319   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
320   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
321   int            *lens,imdex,*lrows,*values;
322   MPI_Comm       comm = A->comm;
323   MPI_Request    *send_waits,*recv_waits;
324   MPI_Status     recv_status,*send_status;
325   IS             istmp;
326 
327   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
328   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
329 
330   /*  first count number of contributors to each processor */
331   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
332   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
333   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
334   for ( i=0; i<N; i++ ) {
335     idx = rows[i];
336     found = 0;
337     for ( j=0; j<size; j++ ) {
338       if (idx >= owners[j] && idx < owners[j+1]) {
339         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
340       }
341     }
342     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
343   }
344   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
345 
346   /* inform other processors of number of messages and max length*/
347   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
348   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
349   nrecvs = work[rank];
350   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
351   nmax = work[rank];
352   PetscFree(work);
353 
354   /* post receives:   */
355   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
356   CHKPTRQ(rvalues);
357   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
358   CHKPTRQ(recv_waits);
359   for ( i=0; i<nrecvs; i++ ) {
360     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
361   }
362 
363   /* do sends:
364       1) starts[i] gives the starting index in svalues for stuff going to
365          the ith processor
366   */
367   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
368   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
369   CHKPTRQ(send_waits);
370   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
371   starts[0] = 0;
372   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
373   for ( i=0; i<N; i++ ) {
374     svalues[starts[owner[i]]++] = rows[i];
375   }
376   ISRestoreIndices(is,&rows);
377 
378   starts[0] = 0;
379   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
380   count = 0;
381   for ( i=0; i<size; i++ ) {
382     if (procs[i]) {
383       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
384     }
385   }
386   PetscFree(starts);
387 
388   base = owners[rank];
389 
390   /*  wait on receives */
391   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
392   source = lens + nrecvs;
393   count  = nrecvs; slen = 0;
394   while (count) {
395     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
396     /* unpack receives into our local space */
397     MPI_Get_count(&recv_status,MPI_INT,&n);
398     source[imdex]  = recv_status.MPI_SOURCE;
399     lens[imdex]  = n;
400     slen += n;
401     count--;
402   }
403   PetscFree(recv_waits);
404 
405   /* move the data into the send scatter */
406   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
407   count = 0;
408   for ( i=0; i<nrecvs; i++ ) {
409     values = rvalues + i*nmax;
410     for ( j=0; j<lens[i]; j++ ) {
411       lrows[count++] = values[j] - base;
412     }
413   }
414   PetscFree(rvalues); PetscFree(lens);
415   PetscFree(owner); PetscFree(nprocs);
416 
417   /* actually zap the local rows */
418   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
419   PLogObjectParent(A,istmp);
420   PetscFree(lrows);
421   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
422   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
423   ierr = ISDestroy(istmp); CHKERRQ(ierr);
424 
425   /* wait on sends */
426   if (nsends) {
427     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
428     CHKPTRQ(send_status);
429     MPI_Waitall(nsends,send_waits,send_status);
430     PetscFree(send_status);
431   }
432   PetscFree(send_waits); PetscFree(svalues);
433 
434   return 0;
435 }
436 
437 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
438 {
439   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
440   int        ierr;
441 
442   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
443   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
444   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
445   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
446   return 0;
447 }
448 
449 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
450 {
451   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
452   int        ierr;
453   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
454   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
455   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
456   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
457   return 0;
458 }
459 
460 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
461 {
462   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
463   int        ierr;
464 
465   /* do nondiagonal part */
466   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
467   /* send it on its way */
468   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
469                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
470   /* do local part */
471   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
472   /* receive remote parts: note this assumes the values are not actually */
473   /* inserted in yy until the next line, which is true for my implementation*/
474   /* but is not perhaps always true. */
475   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
476                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
477   return 0;
478 }
479 
480 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
483   int        ierr;
484 
485   /* do nondiagonal part */
486   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
487   /* send it on its way */
488   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
489                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
490   /* do local part */
491   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
492   /* receive remote parts: note this assumes the values are not actually */
493   /* inserted in yy until the next line, which is true for my implementation*/
494   /* but is not perhaps always true. */
495   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
496                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
497   return 0;
498 }
499 
500 /*
501   This only works correctly for square matrices where the subblock A->A is the
502    diagonal block
503 */
504 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
505 {
506   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
507   return MatGetDiagonal(a->A,v);
508 }
509 
510 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
511 {
512   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
513   int        ierr;
514   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
515   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
516   return 0;
517 }
518 
519 static int MatDestroy_MPIAIJ(PetscObject obj)
520 {
521   Mat        mat = (Mat) obj;
522   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
523   int        ierr;
524 #if defined(PETSC_LOG)
525   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
526 #endif
527   PetscFree(aij->rowners);
528   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
529   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
530   if (aij->colmap) PetscFree(aij->colmap);
531   if (aij->garray) PetscFree(aij->garray);
532   if (aij->lvec)   VecDestroy(aij->lvec);
533   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
534   if (aij->rowvalues) PetscFree(aij->rowvalues);
535   PetscFree(aij);
536   PLogObjectDestroy(mat);
537   PetscHeaderDestroy(mat);
538   return 0;
539 }
540 #include "draw.h"
541 #include "pinclude/pviewer.h"
542 
543 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
544 {
545   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
546   int         ierr;
547 
548   if (aij->size == 1) {
549     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
550   }
551   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
552   return 0;
553 }
554 
555 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
556 {
557   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
558   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
559   int         ierr, format,shift = C->indexshift,rank;
560   FILE        *fd;
561   ViewerType  vtype;
562 
563   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
564   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
565     ierr = ViewerFileGetFormat_Private(viewer,&format);
566     if (format == FILE_FORMAT_INFO_DETAILED) {
567       int nz, nzalloc, mem, flg;
568       MPI_Comm_rank(mat->comm,&rank);
569       ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
570       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
571       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr);
572       MPIU_Seq_begin(mat->comm,1);
573       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
574          rank,aij->m,nz,nzalloc,mem);
575       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
576          rank,aij->m,nz,nzalloc,mem);
577       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
578       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,nz);
579       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
580       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,nz);
581       fflush(fd);
582       MPIU_Seq_end(mat->comm,1);
583       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
584       return 0;
585     }
586     else if (format == FILE_FORMAT_INFO) {
587       return 0;
588     }
589   }
590 
591   if (vtype == DRAW_VIEWER) {
592     Draw       draw;
593     PetscTruth isnull;
594     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
595     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
596   }
597 
598   if (vtype == ASCII_FILE_VIEWER) {
599     ierr = ViewerFileGetPointer(viewer,&fd); CHKERRQ(ierr);
600     MPIU_Seq_begin(mat->comm,1);
601     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
602            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
603            aij->cend);
604     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
605     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
606     fflush(fd);
607     MPIU_Seq_end(mat->comm,1);
608   }
609   else {
610     int size = aij->size;
611     rank = aij->rank;
612     if (size == 1) {
613       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
614     }
615     else {
616       /* assemble the entire matrix onto first processor. */
617       Mat         A;
618       Mat_SeqAIJ *Aloc;
619       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
620       Scalar      *a;
621 
622       if (!rank) {
623         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
624                CHKERRQ(ierr);
625       }
626       else {
627         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
628                CHKERRQ(ierr);
629       }
630       PLogObjectParent(mat,A);
631 
632       /* copy over the A part */
633       Aloc = (Mat_SeqAIJ*) aij->A->data;
634       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
635       row = aij->rstart;
636       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
637       for ( i=0; i<m; i++ ) {
638         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
639         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
640       }
641       aj = Aloc->j;
642       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
643 
644       /* copy over the B part */
645       Aloc = (Mat_SeqAIJ*) aij->B->data;
646       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
647       row = aij->rstart;
648       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
649       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
650       for ( i=0; i<m; i++ ) {
651         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
652         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
653       }
654       PetscFree(ct);
655       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
656       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
657       if (!rank) {
658         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
659       }
660       ierr = MatDestroy(A); CHKERRQ(ierr);
661     }
662   }
663   return 0;
664 }
665 
666 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
667 {
668   Mat         mat = (Mat) obj;
669   int         ierr;
670   ViewerType  vtype;
671 
672   if (!viewer) {
673     viewer = STDOUT_VIEWER_SELF;
674   }
675   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
676   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
677       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
678     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
679   }
680   else if (vtype == BINARY_FILE_VIEWER) {
681     return MatView_MPIAIJ_Binary(mat,viewer);
682   }
683   return 0;
684 }
685 
686 extern int MatMarkDiag_SeqAIJ(Mat);
687 /*
688     This has to provide several versions.
689 
690      1) per sequential
691      2) a) use only local smoothing updating outer values only once.
692         b) local smoothing updating outer values each inner iteration
693      3) color updating out values betwen colors.
694 */
695 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
696                            double fshift,int its,Vec xx)
697 {
698   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
699   Mat        AA = mat->A, BB = mat->B;
700   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
701   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
702   int        ierr,*idx, *diag;
703   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
704   Vec        tt;
705 
706   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
707   xs = x + shift; /* shift by one for index start of 1 */
708   ls = ls + shift;
709   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
710   diag = A->diag;
711   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
712     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
713   }
714   if (flag & SOR_EISENSTAT) {
715     /* Let  A = L + U + D; where L is lower trianglar,
716     U is upper triangular, E is diagonal; This routine applies
717 
718             (L + E)^{-1} A (U + E)^{-1}
719 
720     to a vector efficiently using Eisenstat's trick. This is for
721     the case of SSOR preconditioner, so E is D/omega where omega
722     is the relaxation factor.
723     */
724     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
725     PLogObjectParent(matin,tt);
726     VecGetArray(tt,&t);
727     scale = (2.0/omega) - 1.0;
728     /*  x = (E + U)^{-1} b */
729     VecSet(&zero,mat->lvec);
730     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
731                               mat->Mvctx); CHKERRQ(ierr);
732     for ( i=m-1; i>-1; i-- ) {
733       n    = A->i[i+1] - diag[i] - 1;
734       idx  = A->j + diag[i] + !shift;
735       v    = A->a + diag[i] + !shift;
736       sum  = b[i];
737       SPARSEDENSEMDOT(sum,xs,v,idx,n);
738       d    = fshift + A->a[diag[i]+shift];
739       n    = B->i[i+1] - B->i[i];
740       idx  = B->j + B->i[i] + shift;
741       v    = B->a + B->i[i] + shift;
742       SPARSEDENSEMDOT(sum,ls,v,idx,n);
743       x[i] = omega*(sum/d);
744     }
745     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
746                             mat->Mvctx); CHKERRQ(ierr);
747 
748     /*  t = b - (2*E - D)x */
749     v = A->a;
750     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
751 
752     /*  t = (E + L)^{-1}t */
753     ts = t + shift; /* shifted by one for index start of a or mat->j*/
754     diag = A->diag;
755     VecSet(&zero,mat->lvec);
756     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
757                                                  mat->Mvctx); CHKERRQ(ierr);
758     for ( i=0; i<m; i++ ) {
759       n    = diag[i] - A->i[i];
760       idx  = A->j + A->i[i] + shift;
761       v    = A->a + A->i[i] + shift;
762       sum  = t[i];
763       SPARSEDENSEMDOT(sum,ts,v,idx,n);
764       d    = fshift + A->a[diag[i]+shift];
765       n    = B->i[i+1] - B->i[i];
766       idx  = B->j + B->i[i] + shift;
767       v    = B->a + B->i[i] + shift;
768       SPARSEDENSEMDOT(sum,ls,v,idx,n);
769       t[i] = omega*(sum/d);
770     }
771     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
772                                                     mat->Mvctx); CHKERRQ(ierr);
773     /*  x = x + t */
774     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
775     VecDestroy(tt);
776     return 0;
777   }
778 
779 
780   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
781     if (flag & SOR_ZERO_INITIAL_GUESS) {
782       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
783     }
784     else {
785       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
786       CHKERRQ(ierr);
787       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
788       CHKERRQ(ierr);
789     }
790     while (its--) {
791       /* go down through the rows */
792       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
793                               mat->Mvctx); CHKERRQ(ierr);
794       for ( i=0; i<m; i++ ) {
795         n    = A->i[i+1] - A->i[i];
796         idx  = A->j + A->i[i] + shift;
797         v    = A->a + A->i[i] + shift;
798         sum  = b[i];
799         SPARSEDENSEMDOT(sum,xs,v,idx,n);
800         d    = fshift + A->a[diag[i]+shift];
801         n    = B->i[i+1] - B->i[i];
802         idx  = B->j + B->i[i] + shift;
803         v    = B->a + B->i[i] + shift;
804         SPARSEDENSEMDOT(sum,ls,v,idx,n);
805         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
806       }
807       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
808                             mat->Mvctx); CHKERRQ(ierr);
809       /* come up through the rows */
810       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
811                               mat->Mvctx); CHKERRQ(ierr);
812       for ( i=m-1; i>-1; i-- ) {
813         n    = A->i[i+1] - A->i[i];
814         idx  = A->j + A->i[i] + shift;
815         v    = A->a + A->i[i] + shift;
816         sum  = b[i];
817         SPARSEDENSEMDOT(sum,xs,v,idx,n);
818         d    = fshift + A->a[diag[i]+shift];
819         n    = B->i[i+1] - B->i[i];
820         idx  = B->j + B->i[i] + shift;
821         v    = B->a + B->i[i] + shift;
822         SPARSEDENSEMDOT(sum,ls,v,idx,n);
823         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
824       }
825       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
826                             mat->Mvctx); CHKERRQ(ierr);
827     }
828   }
829   else if (flag & SOR_FORWARD_SWEEP){
830     if (flag & SOR_ZERO_INITIAL_GUESS) {
831       VecSet(&zero,mat->lvec);
832       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
833                               mat->Mvctx); CHKERRQ(ierr);
834       for ( i=0; i<m; i++ ) {
835         n    = diag[i] - A->i[i];
836         idx  = A->j + A->i[i] + shift;
837         v    = A->a + A->i[i] + shift;
838         sum  = b[i];
839         SPARSEDENSEMDOT(sum,xs,v,idx,n);
840         d    = fshift + A->a[diag[i]+shift];
841         n    = B->i[i+1] - B->i[i];
842         idx  = B->j + B->i[i] + shift;
843         v    = B->a + B->i[i] + shift;
844         SPARSEDENSEMDOT(sum,ls,v,idx,n);
845         x[i] = omega*(sum/d);
846       }
847       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
848                             mat->Mvctx); CHKERRQ(ierr);
849       its--;
850     }
851     while (its--) {
852       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
853       CHKERRQ(ierr);
854       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
855       CHKERRQ(ierr);
856       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
857                               mat->Mvctx); CHKERRQ(ierr);
858       for ( i=0; i<m; i++ ) {
859         n    = A->i[i+1] - A->i[i];
860         idx  = A->j + A->i[i] + shift;
861         v    = A->a + A->i[i] + shift;
862         sum  = b[i];
863         SPARSEDENSEMDOT(sum,xs,v,idx,n);
864         d    = fshift + A->a[diag[i]+shift];
865         n    = B->i[i+1] - B->i[i];
866         idx  = B->j + B->i[i] + shift;
867         v    = B->a + B->i[i] + shift;
868         SPARSEDENSEMDOT(sum,ls,v,idx,n);
869         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
870       }
871       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
872                             mat->Mvctx); CHKERRQ(ierr);
873     }
874   }
875   else if (flag & SOR_BACKWARD_SWEEP){
876     if (flag & SOR_ZERO_INITIAL_GUESS) {
877       VecSet(&zero,mat->lvec);
878       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
879                               mat->Mvctx); CHKERRQ(ierr);
880       for ( i=m-1; i>-1; i-- ) {
881         n    = A->i[i+1] - diag[i] - 1;
882         idx  = A->j + diag[i] + !shift;
883         v    = A->a + diag[i] + !shift;
884         sum  = b[i];
885         SPARSEDENSEMDOT(sum,xs,v,idx,n);
886         d    = fshift + A->a[diag[i]+shift];
887         n    = B->i[i+1] - B->i[i];
888         idx  = B->j + B->i[i] + shift;
889         v    = B->a + B->i[i] + shift;
890         SPARSEDENSEMDOT(sum,ls,v,idx,n);
891         x[i] = omega*(sum/d);
892       }
893       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
894                             mat->Mvctx); CHKERRQ(ierr);
895       its--;
896     }
897     while (its--) {
898       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
899                             mat->Mvctx); CHKERRQ(ierr);
900       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
901                             mat->Mvctx); CHKERRQ(ierr);
902       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
903                               mat->Mvctx); CHKERRQ(ierr);
904       for ( i=m-1; i>-1; i-- ) {
905         n    = A->i[i+1] - A->i[i];
906         idx  = A->j + A->i[i] + shift;
907         v    = A->a + A->i[i] + shift;
908         sum  = b[i];
909         SPARSEDENSEMDOT(sum,xs,v,idx,n);
910         d    = fshift + A->a[diag[i]+shift];
911         n    = B->i[i+1] - B->i[i];
912         idx  = B->j + B->i[i] + shift;
913         v    = B->a + B->i[i] + shift;
914         SPARSEDENSEMDOT(sum,ls,v,idx,n);
915         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
916       }
917       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
918                             mat->Mvctx); CHKERRQ(ierr);
919     }
920   }
921   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
922     if (flag & SOR_ZERO_INITIAL_GUESS) {
923       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
924     }
925     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
926     CHKERRQ(ierr);
927     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
928     CHKERRQ(ierr);
929     while (its--) {
930       /* go down through the rows */
931       for ( i=0; i<m; i++ ) {
932         n    = A->i[i+1] - A->i[i];
933         idx  = A->j + A->i[i] + shift;
934         v    = A->a + A->i[i] + shift;
935         sum  = b[i];
936         SPARSEDENSEMDOT(sum,xs,v,idx,n);
937         d    = fshift + A->a[diag[i]+shift];
938         n    = B->i[i+1] - B->i[i];
939         idx  = B->j + B->i[i] + shift;
940         v    = B->a + B->i[i] + shift;
941         SPARSEDENSEMDOT(sum,ls,v,idx,n);
942         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
943       }
944       /* come up through the rows */
945       for ( i=m-1; i>-1; i-- ) {
946         n    = A->i[i+1] - A->i[i];
947         idx  = A->j + A->i[i] + shift;
948         v    = A->a + A->i[i] + shift;
949         sum  = b[i];
950         SPARSEDENSEMDOT(sum,xs,v,idx,n);
951         d    = fshift + A->a[diag[i]+shift];
952         n    = B->i[i+1] - B->i[i];
953         idx  = B->j + B->i[i] + shift;
954         v    = B->a + B->i[i] + shift;
955         SPARSEDENSEMDOT(sum,ls,v,idx,n);
956         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
957       }
958     }
959   }
960   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
961     if (flag & SOR_ZERO_INITIAL_GUESS) {
962       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
963     }
964     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
965     CHKERRQ(ierr);
966     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
967     CHKERRQ(ierr);
968     while (its--) {
969       for ( i=0; i<m; i++ ) {
970         n    = A->i[i+1] - A->i[i];
971         idx  = A->j + A->i[i] + shift;
972         v    = A->a + A->i[i] + shift;
973         sum  = b[i];
974         SPARSEDENSEMDOT(sum,xs,v,idx,n);
975         d    = fshift + A->a[diag[i]+shift];
976         n    = B->i[i+1] - B->i[i];
977         idx  = B->j + B->i[i] + shift;
978         v    = B->a + B->i[i] + shift;
979         SPARSEDENSEMDOT(sum,ls,v,idx,n);
980         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
981       }
982     }
983   }
984   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
985     if (flag & SOR_ZERO_INITIAL_GUESS) {
986       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
987     }
988     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
989                             mat->Mvctx); CHKERRQ(ierr);
990     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
991                             mat->Mvctx); CHKERRQ(ierr);
992     while (its--) {
993       for ( i=m-1; i>-1; i-- ) {
994         n    = A->i[i+1] - A->i[i];
995         idx  = A->j + A->i[i] + shift;
996         v    = A->a + A->i[i] + shift;
997         sum  = b[i];
998         SPARSEDENSEMDOT(sum,xs,v,idx,n);
999         d    = fshift + A->a[diag[i]+shift];
1000         n    = B->i[i+1] - B->i[i];
1001         idx  = B->j + B->i[i] + shift;
1002         v    = B->a + B->i[i] + shift;
1003         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1004         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1005       }
1006     }
1007   }
1008   return 0;
1009 }
1010 
1011 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1012                              int *nzalloc,int *mem)
1013 {
1014   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1015   Mat        A = mat->A, B = mat->B;
1016   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1017 
1018   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
1019   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1020   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1021   if (flag == MAT_LOCAL) {
1022     if (nz)       *nz      = isend[0];
1023     if (nzalloc)  *nzalloc = isend[1];
1024     if (mem)      *mem     = isend[2];
1025   } else if (flag == MAT_GLOBAL_MAX) {
1026     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1027     if (nz)      *nz      = irecv[0];
1028     if (nzalloc) *nzalloc = irecv[1];
1029     if (mem)     *mem     = irecv[2];
1030   } else if (flag == MAT_GLOBAL_SUM) {
1031     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1032     if (nz)      *nz      = irecv[0];
1033     if (nzalloc) *nzalloc = irecv[1];
1034     if (mem)     *mem     = irecv[2];
1035   }
1036   return 0;
1037 }
1038 
1039 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1040 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1041 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1042 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1043 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1044 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1045 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1046 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1047 
1048 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1051 
1052   if (op == NO_NEW_NONZERO_LOCATIONS ||
1053       op == YES_NEW_NONZERO_LOCATIONS ||
1054       op == COLUMNS_SORTED ||
1055       op == ROW_ORIENTED) {
1056         MatSetOption(a->A,op);
1057         MatSetOption(a->B,op);
1058   }
1059   else if (op == ROWS_SORTED ||
1060            op == SYMMETRIC_MATRIX ||
1061            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1062            op == YES_NEW_DIAGONALS)
1063     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1064   else if (op == COLUMN_ORIENTED) {
1065     a->roworiented = 0;
1066     MatSetOption(a->A,op);
1067     MatSetOption(a->B,op);
1068   }
1069   else if (op == NO_NEW_DIAGONALS)
1070     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1071   else
1072     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1073   return 0;
1074 }
1075 
1076 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1077 {
1078   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1079   *m = mat->M; *n = mat->N;
1080   return 0;
1081 }
1082 
1083 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1084 {
1085   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1086   *m = mat->m; *n = mat->N;
1087   return 0;
1088 }
1089 
1090 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1091 {
1092   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1093   *m = mat->rstart; *n = mat->rend;
1094   return 0;
1095 }
1096 
1097 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1098 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
1099 
1100 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1101 {
1102   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1103   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1104   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1105   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1106   int        *cmap, *idx_p;
1107 
1108   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,"MatGetRow_MPIAIJ:Already active");
1109   mat->getrowactive = PETSC_TRUE;
1110 
1111   if (!mat->rowvalues && (idx || v)) {
1112     /*
1113         allocate enough space to hold information from the longest row.
1114     */
1115     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1116     int     max = 1,n = mat->n,tmp;
1117     for ( i=0; i<n; i++ ) {
1118       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1119       if (max < tmp) { max = tmp; }
1120     }
1121     mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar)));
1122     CHKPTRQ(mat->rowvalues);
1123     mat->rowindices = (int *) (mat->rowvalues + max);
1124   }
1125 
1126 
1127   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1128   lrow = row - rstart;
1129 
1130   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1131   if (!v)   {pvA = 0; pvB = 0;}
1132   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1133   ierr = MatGetRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1134   ierr = MatGetRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1135   nztot = nzA + nzB;
1136 
1137   cmap  = mat->garray;
1138   if (v  || idx) {
1139     if (nztot) {
1140       /* Sort by increasing column numbers, assuming A and B already sorted */
1141       int imark = -1;
1142       if (v) {
1143         *v = v_p = mat->rowvalues;
1144         for ( i=0; i<nzB; i++ ) {
1145           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1146           else break;
1147         }
1148         imark = i;
1149         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1150         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1151       }
1152       if (idx) {
1153         *idx = idx_p = mat->rowindices;
1154         if (imark > -1) {
1155           for ( i=0; i<imark; i++ ) {
1156             idx_p[i] = cmap[cworkB[i]];
1157           }
1158         } else {
1159           for ( i=0; i<nzB; i++ ) {
1160             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1161             else break;
1162           }
1163           imark = i;
1164         }
1165         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1166         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1167       }
1168     }
1169     else {*idx = 0; *v=0;}
1170   }
1171   *nz = nztot;
1172   ierr = MatRestoreRow_SeqAIJ(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1173   ierr = MatRestoreRow_SeqAIJ(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1174   return 0;
1175 }
1176 
1177 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1178 {
1179   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1180   if (aij->getrowactive == PETSC_FALSE) {
1181     SETERRQ(1,"MatRestoreRow_MPIAIJ:MatGetRow not called");
1182   }
1183   aij->getrowactive = PETSC_FALSE;
1184   return 0;
1185 }
1186 
1187 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1188 {
1189   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1190   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1191   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1192   double     sum = 0.0;
1193   Scalar     *v;
1194 
1195   if (aij->size == 1) {
1196     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1197   } else {
1198     if (type == NORM_FROBENIUS) {
1199       v = amat->a;
1200       for (i=0; i<amat->nz; i++ ) {
1201 #if defined(PETSC_COMPLEX)
1202         sum += real(conj(*v)*(*v)); v++;
1203 #else
1204         sum += (*v)*(*v); v++;
1205 #endif
1206       }
1207       v = bmat->a;
1208       for (i=0; i<bmat->nz; i++ ) {
1209 #if defined(PETSC_COMPLEX)
1210         sum += real(conj(*v)*(*v)); v++;
1211 #else
1212         sum += (*v)*(*v); v++;
1213 #endif
1214       }
1215       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1216       *norm = sqrt(*norm);
1217     }
1218     else if (type == NORM_1) { /* max column norm */
1219       double *tmp, *tmp2;
1220       int    *jj, *garray = aij->garray;
1221       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1222       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1223       PetscMemzero(tmp,aij->N*sizeof(double));
1224       *norm = 0.0;
1225       v = amat->a; jj = amat->j;
1226       for ( j=0; j<amat->nz; j++ ) {
1227         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1228       }
1229       v = bmat->a; jj = bmat->j;
1230       for ( j=0; j<bmat->nz; j++ ) {
1231         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1232       }
1233       MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1234       for ( j=0; j<aij->N; j++ ) {
1235         if (tmp2[j] > *norm) *norm = tmp2[j];
1236       }
1237       PetscFree(tmp); PetscFree(tmp2);
1238     }
1239     else if (type == NORM_INFINITY) { /* max row norm */
1240       double ntemp = 0.0;
1241       for ( j=0; j<amat->m; j++ ) {
1242         v = amat->a + amat->i[j] + shift;
1243         sum = 0.0;
1244         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1245           sum += PetscAbsScalar(*v); v++;
1246         }
1247         v = bmat->a + bmat->i[j] + shift;
1248         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1249           sum += PetscAbsScalar(*v); v++;
1250         }
1251         if (sum > ntemp) ntemp = sum;
1252       }
1253       MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1254     }
1255     else {
1256       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1257     }
1258   }
1259   return 0;
1260 }
1261 
1262 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1263 {
1264   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1265   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1266   int        ierr,shift = Aloc->indexshift;
1267   Mat        B;
1268   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1269   Scalar     *array;
1270 
1271   if (matout == PETSC_NULL && M != N)
1272     SETERRQ(1,"MatTranspose_MPIAIJ:Square matrix only for in-place");
1273   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1274          PETSC_NULL,&B); CHKERRQ(ierr);
1275 
1276   /* copy over the A part */
1277   Aloc = (Mat_SeqAIJ*) a->A->data;
1278   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1279   row = a->rstart;
1280   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1281   for ( i=0; i<m; i++ ) {
1282     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1283     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1284   }
1285   aj = Aloc->j;
1286   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1287 
1288   /* copy over the B part */
1289   Aloc = (Mat_SeqAIJ*) a->B->data;
1290   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1291   row = a->rstart;
1292   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1293   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1294   for ( i=0; i<m; i++ ) {
1295     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1296     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1297   }
1298   PetscFree(ct);
1299   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1300   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1301   if (matout != PETSC_NULL) {
1302     *matout = B;
1303   } else {
1304     /* This isn't really an in-place transpose .... but free data structures from a */
1305     PetscFree(a->rowners);
1306     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1307     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1308     if (a->colmap) PetscFree(a->colmap);
1309     if (a->garray) PetscFree(a->garray);
1310     if (a->lvec) VecDestroy(a->lvec);
1311     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1312     PetscFree(a);
1313     PetscMemcpy(A,B,sizeof(struct _Mat));
1314     PetscHeaderDestroy(B);
1315   }
1316   return 0;
1317 }
1318 
1319 extern int MatPrintHelp_SeqAIJ(Mat);
1320 static int MatPrintHelp_MPIAIJ(Mat A)
1321 {
1322   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1323 
1324   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1325   else return 0;
1326 }
1327 
1328 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1329 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1330 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1331 int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **);
1332 /* -------------------------------------------------------------------*/
1333 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1334        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1335        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1336        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1337        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1338        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1339        MatLUFactor_MPIAIJ,0,
1340        MatRelax_MPIAIJ,
1341        MatTranspose_MPIAIJ,
1342        MatGetInfo_MPIAIJ,0,
1343        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1344        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1345        0,
1346        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1347        MatGetReordering_MPIAIJ,
1348        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1349        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1350        MatILUFactorSymbolic_MPIAIJ,0,
1351        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1352        0,0,0,
1353        MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0,
1354        MatPrintHelp_MPIAIJ,
1355        MatScale_MPIAIJ};
1356 
1357 /*@C
1358    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1359    (the default parallel PETSc format).  For good matrix assembly performance
1360    the user should preallocate the matrix storage by setting the parameters
1361    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1362    performance can be increased by more than a factor of 50.
1363 
1364    Input Parameters:
1365 .  comm - MPI communicator
1366 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1367 .  n - number of local columns (or PETSC_DECIDE to have calculated
1368            if N is given)
1369 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1370 .  N - number of global columns (or PETSC_DECIDE to have calculated
1371            if n is given)
1372 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1373            (same for all local rows)
1374 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1375            or null (possibly different for each row).  You must leave room
1376            for the diagonal entry even if it is zero.
1377 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1378            submatrix (same for all local rows).
1379 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1380            submatrix or null (possibly different for each row).
1381 
1382    Output Parameter:
1383 .  newmat - the matrix
1384 
1385    Notes:
1386    The AIJ format (also called the Yale sparse matrix format or
1387    compressed row storage), is fully compatible with standard Fortran 77
1388    storage.  That is, the stored row and column indices can begin at
1389    either one (as in Fortran) or zero.  See the users manual for details.
1390 
1391    The user MUST specify either the local or global matrix dimensions
1392    (possibly both).
1393 
1394    By default, this format uses inodes (identical nodes) when possible.
1395    We search for consecutive rows with the same nonzero structure, thereby
1396    reusing matrix information to achieve increased efficiency.
1397 
1398    Options Database Keys:
1399 $    -mat_aij_no_inode  - Do not use inodes
1400 $    -mat_aij_inode_limit <limit> - Set inode limit.
1401 $        (max limit=5)
1402 $    -mat_aij_oneindex - Internally use indexing starting at 1
1403 $        rather than 0.  Note: When calling MatSetValues(),
1404 $        the user still MUST index entries starting at 0!
1405 
1406    Storage Information:
1407    For a square global matrix we define each processor's diagonal portion
1408    to be its local rows and the corresponding columns (a square submatrix);
1409    each processor's off-diagonal portion encompasses the remainder of the
1410    local matrix (a rectangular submatrix).
1411 
1412    The user can specify preallocated storage for the diagonal part of
1413    the local submatrix with either d_nz or d_nnz (not both).  Set
1414    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1415    memory allocation.  Likewise, specify preallocated storage for the
1416    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1417 
1418    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1419    the figure below we depict these three local rows and all columns (0-11).
1420 
1421 $          0 1 2 3 4 5 6 7 8 9 10 11
1422 $         -------------------
1423 $  row 3  |  o o o d d d o o o o o o
1424 $  row 4  |  o o o d d d o o o o o o
1425 $  row 5  |  o o o d d d o o o o o o
1426 $         -------------------
1427 $
1428 
1429    Thus, any entries in the d locations are stored in the d (diagonal)
1430    submatrix, and any entries in the o locations are stored in the
1431    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1432    stored simply in the MATSEQAIJ format for compressed row storage.
1433 
1434    Now d_nz should indicate the number of nonzeros per row in the d matrix,
1435    and o_nz should indicate the number of nonzeros per row in the o matrix.
1436    In general, for PDE problems in which most nonzeros are near the diagonal,
1437    one expects d_nz >> o_nz.   For additional details, see the users manual
1438    chapter on matrices and the file $(PETSC_DIR)/Performance.
1439 
1440 .keywords: matrix, aij, compressed row, sparse, parallel
1441 
1442 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1443 @*/
1444 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1445                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
1446 {
1447   Mat          mat;
1448   Mat_MPIAIJ   *a;
1449   int          ierr, i,sum[2],work[2];
1450 
1451   *newmat         = 0;
1452   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1453   PLogObjectCreate(mat);
1454   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1455   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1456   mat->destroy    = MatDestroy_MPIAIJ;
1457   mat->view       = MatView_MPIAIJ;
1458   mat->factor     = 0;
1459   mat->assembled  = PETSC_FALSE;
1460 
1461   a->insertmode = NOT_SET_VALUES;
1462   MPI_Comm_rank(comm,&a->rank);
1463   MPI_Comm_size(comm,&a->size);
1464 
1465   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1466     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1467 
1468   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1469     work[0] = m; work[1] = n;
1470     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1471     if (M == PETSC_DECIDE) M = sum[0];
1472     if (N == PETSC_DECIDE) N = sum[1];
1473   }
1474   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1475   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1476   a->m = m;
1477   a->n = n;
1478   a->N = N;
1479   a->M = M;
1480 
1481   /* build local table of row and column ownerships */
1482   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1483   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1484   a->cowners = a->rowners + a->size + 1;
1485   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1486   a->rowners[0] = 0;
1487   for ( i=2; i<=a->size; i++ ) {
1488     a->rowners[i] += a->rowners[i-1];
1489   }
1490   a->rstart = a->rowners[a->rank];
1491   a->rend   = a->rowners[a->rank+1];
1492   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1493   a->cowners[0] = 0;
1494   for ( i=2; i<=a->size; i++ ) {
1495     a->cowners[i] += a->cowners[i-1];
1496   }
1497   a->cstart = a->cowners[a->rank];
1498   a->cend   = a->cowners[a->rank+1];
1499 
1500   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1501   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1502   PLogObjectParent(mat,a->A);
1503   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1504   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1505   PLogObjectParent(mat,a->B);
1506 
1507   /* build cache for off array entries formed */
1508   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1509   a->colmap      = 0;
1510   a->garray      = 0;
1511   a->roworiented = 1;
1512 
1513   /* stuff used for matrix vector multiply */
1514   a->lvec      = 0;
1515   a->Mvctx     = 0;
1516 
1517   /* stuff for MatGetRow() */
1518   a->rowindices   = 0;
1519   a->rowvalues    = 0;
1520   a->getrowactive = PETSC_FALSE;
1521 
1522   *newmat = mat;
1523   return 0;
1524 }
1525 
1526 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1527 {
1528   Mat        mat;
1529   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1530   int        ierr, len,flg;
1531 
1532   *newmat       = 0;
1533   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1534   PLogObjectCreate(mat);
1535   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1536   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1537   mat->destroy    = MatDestroy_MPIAIJ;
1538   mat->view       = MatView_MPIAIJ;
1539   mat->factor     = matin->factor;
1540   mat->assembled  = PETSC_TRUE;
1541 
1542   a->m            = oldmat->m;
1543   a->n            = oldmat->n;
1544   a->M            = oldmat->M;
1545   a->N            = oldmat->N;
1546 
1547   a->rstart       = oldmat->rstart;
1548   a->rend         = oldmat->rend;
1549   a->cstart       = oldmat->cstart;
1550   a->cend         = oldmat->cend;
1551   a->size         = oldmat->size;
1552   a->rank         = oldmat->rank;
1553   a->insertmode   = NOT_SET_VALUES;
1554   a->rowvalues    = 0;
1555   a->getrowactive = PETSC_FALSE;
1556 
1557   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1558   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1559   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1560   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1561   if (oldmat->colmap) {
1562     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1563     PLogObjectMemory(mat,(a->N)*sizeof(int));
1564     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1565   } else a->colmap = 0;
1566   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1567     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1568     PLogObjectMemory(mat,len*sizeof(int));
1569     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1570   } else a->garray = 0;
1571 
1572   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1573   PLogObjectParent(mat,a->lvec);
1574   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1575   PLogObjectParent(mat,a->Mvctx);
1576   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1577   PLogObjectParent(mat,a->A);
1578   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1579   PLogObjectParent(mat,a->B);
1580   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1581   if (flg) {
1582     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1583   }
1584   *newmat = mat;
1585   return 0;
1586 }
1587 
1588 #include "sysio.h"
1589 
1590 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1591 {
1592   Mat          A;
1593   int          i, nz, ierr, j,rstart, rend, fd;
1594   Scalar       *vals,*svals;
1595   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1596   MPI_Status   status;
1597   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1598   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1599   int          tag = ((PetscObject)viewer)->tag;
1600 
1601   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1602   if (!rank) {
1603     ierr = ViewerFileGetDescriptor_Private(viewer,&fd); CHKERRQ(ierr);
1604     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1605     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJ:not matrix object");
1606   }
1607 
1608   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1609   M = header[1]; N = header[2];
1610   /* determine ownership of all rows */
1611   m = M/size + ((M % size) > rank);
1612   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1613   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1614   rowners[0] = 0;
1615   for ( i=2; i<=size; i++ ) {
1616     rowners[i] += rowners[i-1];
1617   }
1618   rstart = rowners[rank];
1619   rend   = rowners[rank+1];
1620 
1621   /* distribute row lengths to all processors */
1622   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1623   offlens = ourlens + (rend-rstart);
1624   if (!rank) {
1625     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1626     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1627     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1628     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1629     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1630     PetscFree(sndcounts);
1631   }
1632   else {
1633     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1634   }
1635 
1636   if (!rank) {
1637     /* calculate the number of nonzeros on each processor */
1638     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1639     PetscMemzero(procsnz,size*sizeof(int));
1640     for ( i=0; i<size; i++ ) {
1641       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1642         procsnz[i] += rowlengths[j];
1643       }
1644     }
1645     PetscFree(rowlengths);
1646 
1647     /* determine max buffer needed and allocate it */
1648     maxnz = 0;
1649     for ( i=0; i<size; i++ ) {
1650       maxnz = PetscMax(maxnz,procsnz[i]);
1651     }
1652     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1653 
1654     /* read in my part of the matrix column indices  */
1655     nz = procsnz[0];
1656     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1657     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1658 
1659     /* read in every one elses and ship off */
1660     for ( i=1; i<size; i++ ) {
1661       nz = procsnz[i];
1662       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1663       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1664     }
1665     PetscFree(cols);
1666   }
1667   else {
1668     /* determine buffer space needed for message */
1669     nz = 0;
1670     for ( i=0; i<m; i++ ) {
1671       nz += ourlens[i];
1672     }
1673     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1674 
1675     /* receive message of column indices*/
1676     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1677     MPI_Get_count(&status,MPI_INT,&maxnz);
1678     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1679   }
1680 
1681   /* loop over local rows, determining number of off diagonal entries */
1682   PetscMemzero(offlens,m*sizeof(int));
1683   jj = 0;
1684   for ( i=0; i<m; i++ ) {
1685     for ( j=0; j<ourlens[i]; j++ ) {
1686       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1687       jj++;
1688     }
1689   }
1690 
1691   /* create our matrix */
1692   for ( i=0; i<m; i++ ) {
1693     ourlens[i] -= offlens[i];
1694   }
1695   ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1696   A = *newmat;
1697   MatSetOption(A,COLUMNS_SORTED);
1698   for ( i=0; i<m; i++ ) {
1699     ourlens[i] += offlens[i];
1700   }
1701 
1702   if (!rank) {
1703     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1704 
1705     /* read in my part of the matrix numerical values  */
1706     nz = procsnz[0];
1707     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1708 
1709     /* insert into matrix */
1710     jj      = rstart;
1711     smycols = mycols;
1712     svals   = vals;
1713     for ( i=0; i<m; i++ ) {
1714       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1715       smycols += ourlens[i];
1716       svals   += ourlens[i];
1717       jj++;
1718     }
1719 
1720     /* read in other processors and ship out */
1721     for ( i=1; i<size; i++ ) {
1722       nz = procsnz[i];
1723       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1724       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1725     }
1726     PetscFree(procsnz);
1727   }
1728   else {
1729     /* receive numeric values */
1730     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1731 
1732     /* receive message of values*/
1733     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1734     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1735     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJ:something is wrong with file");
1736 
1737     /* insert into matrix */
1738     jj      = rstart;
1739     smycols = mycols;
1740     svals   = vals;
1741     for ( i=0; i<m; i++ ) {
1742       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1743       smycols += ourlens[i];
1744       svals   += ourlens[i];
1745       jj++;
1746     }
1747   }
1748   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1749 
1750   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1751   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1752   return 0;
1753 }
1754