xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b4874afa33ef89d436bd5cc157aada57df95d045)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.113 1996/01/23 00:44:13 balay Exp balay $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PetscMalloc(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscMemzero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int        ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
42   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
43   Scalar     value;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift,roworiented = aij->roworiented;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (im[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (im[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (im[i] >= rstart && im[i] < rend) {
56       row = im[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (in[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (in[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (in[j] >= cstart && in[j] < cend){
61           col = in[j] - cstart;
62           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
63           ierr = MatSetValues(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
64         }
65         else {
66           if (aij->assembled) {
67             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
68             col = aij->colmap[in[j]] + shift;
69             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
70               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
71               col =  in[j];
72             }
73           }
74           else col = in[j];
75           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
76           ierr = MatSetValues(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
77         }
78       }
79     }
80     else {
81       if (roworiented) {
82         ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
83       }
84       else {
85         row = im[i];
86         for ( j=0; j<n; j++ ) {
87           ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
88         }
89       }
90     }
91   }
92   return 0;
93 }
94 
95 static int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
96 {
97   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
98   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
99   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
100   int        cstart = aij->cstart, cend = aij->cend,row,col;
101   int        shift = C->indexshift;
102 
103   if (!aij->assembled) SETERRQ(1,"MatGetValues_MPIAIJ:Not for unassembled matrix");
104   for ( i=0; i<m; i++ ) {
105     if (idxm[i] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative row");
106     if (idxm[i] >= aij->M) SETERRQ(1,"MatGetValues_MPIAIJ:Row too large");
107     if (idxm[i] >= rstart && idxm[i] < rend) {
108       row = idxm[i] - rstart;
109       for ( j=0; j<n; j++ ) {
110         if (idxn[j] < 0) SETERRQ(1,"MatGetValues_MPIAIJ:Negative column");
111         if (idxn[j] >= aij->N) SETERRQ(1,"MatGetValues_MPIAIJ:Col too large");
112         if (idxn[j] >= cstart && idxn[j] < cend){
113           col = idxn[j] - cstart;
114           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
115         }
116         else {
117           col = aij->colmap[idxn[j]] + shift;
118           ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
119         }
120       }
121     }
122     else {
123       SETERRQ(1,"MatGetValues_MPIAIJ:Only local values currently supported");
124     }
125   }
126   return 0;
127 }
128 
129 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
130 {
131   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
132   MPI_Comm    comm = mat->comm;
133   int         size = aij->size, *owners = aij->rowners;
134   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
135   MPI_Request *send_waits,*recv_waits;
136   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
137   InsertMode  addv;
138   Scalar      *rvalues,*svalues;
139 
140   /* make sure all processors are either in INSERTMODE or ADDMODE */
141   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
142   if (addv == (ADD_VALUES|INSERT_VALUES)) {
143     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
144   }
145   aij->insertmode = addv; /* in case this processor had no cache */
146 
147   /*  first count number of contributors to each processor */
148   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
149   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
150   owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
151   for ( i=0; i<aij->stash.n; i++ ) {
152     idx = aij->stash.idx[i];
153     for ( j=0; j<size; j++ ) {
154       if (idx >= owners[j] && idx < owners[j+1]) {
155         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
156       }
157     }
158   }
159   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
160 
161   /* inform other processors of number of messages and max length*/
162   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
163   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
164   nreceives = work[rank];
165   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
166   nmax = work[rank];
167   PetscFree(work);
168 
169   /* post receives:
170        1) each message will consist of ordered pairs
171      (global index,value) we store the global index as a double
172      to simplify the message passing.
173        2) since we don't know how long each individual message is we
174      allocate the largest needed buffer for each receive. Potentially
175      this is a lot of wasted space.
176 
177 
178        This could be done better.
179   */
180   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
181   CHKPTRQ(rvalues);
182   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
183   CHKPTRQ(recv_waits);
184   for ( i=0; i<nreceives; i++ ) {
185     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
186               comm,recv_waits+i);
187   }
188 
189   /* do sends:
190       1) starts[i] gives the starting index in svalues for stuff going to
191          the ith processor
192   */
193   svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
194   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
195   CHKPTRQ(send_waits);
196   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
197   starts[0] = 0;
198   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
199   for ( i=0; i<aij->stash.n; i++ ) {
200     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
201     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
202     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
203   }
204   PetscFree(owner);
205   starts[0] = 0;
206   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
207   count = 0;
208   for ( i=0; i<size; i++ ) {
209     if (procs[i]) {
210       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
211                 comm,send_waits+count++);
212     }
213   }
214   PetscFree(starts); PetscFree(nprocs);
215 
216   /* Free cache space */
217   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
218 
219   aij->svalues    = svalues;    aij->rvalues    = rvalues;
220   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
221   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
222   aij->rmax       = nmax;
223 
224   return 0;
225 }
226 extern int MatSetUpMultiply_MPIAIJ(Mat);
227 
228 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
229 {
230   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
231   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
232   MPI_Status  *send_status,recv_status;
233   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
234   int         row,col,other_disassembled,shift = C->indexshift;
235   Scalar      *values,val;
236   InsertMode  addv = aij->insertmode;
237 
238   /*  wait on receives */
239   while (count) {
240     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
241     /* unpack receives into our local space */
242     values = aij->rvalues + 3*imdex*aij->rmax;
243     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
244     n = n/3;
245     for ( i=0; i<n; i++ ) {
246       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
247       col = (int) PETSCREAL(values[3*i+1]);
248       val = values[3*i+2];
249       if (col >= aij->cstart && col < aij->cend) {
250         col -= aij->cstart;
251         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
252       }
253       else {
254         if (aij->assembled) {
255           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
256           col = aij->colmap[col] + shift;
257           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
258             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
259             col = (int) PETSCREAL(values[3*i+1]);
260           }
261         }
262         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
263       }
264     }
265     count--;
266   }
267   PetscFree(aij->recv_waits); PetscFree(aij->rvalues);
268 
269   /* wait on sends */
270   if (aij->nsends) {
271     send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));
272     CHKPTRQ(send_status);
273     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
274     PetscFree(send_status);
275   }
276   PetscFree(aij->send_waits); PetscFree(aij->svalues);
277 
278   aij->insertmode = NOT_SET_VALUES;
279   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
280   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
281 
282   /* determine if any processor has disassembled, if so we must
283      also disassemble ourselfs, in order that we may reassemble. */
284   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
285   if (aij->assembled && !other_disassembled) {
286     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
287   }
288 
289   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
290     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
291     aij->assembled = 1;
292   }
293   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
294   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
295 
296   return 0;
297 }
298 
299 static int MatZeroEntries_MPIAIJ(Mat A)
300 {
301   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
302   int        ierr;
303   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
304   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
305   return 0;
306 }
307 
308 /* the code does not do the diagonal entries correctly unless the
309    matrix is square and the column and row owerships are identical.
310    This is a BUG. The only way to fix it seems to be to access
311    aij->A and aij->B directly and not through the MatZeroRows()
312    routine.
313 */
314 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
315 {
316   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
317   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
318   int            *procs,*nprocs,j,found,idx,nsends,*work;
319   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
320   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
321   int            *lens,imdex,*lrows,*values;
322   MPI_Comm       comm = A->comm;
323   MPI_Request    *send_waits,*recv_waits;
324   MPI_Status     recv_status,*send_status;
325   IS             istmp;
326 
327   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
328   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
329   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
330 
331   /*  first count number of contributors to each processor */
332   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
333   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
334   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
335   for ( i=0; i<N; i++ ) {
336     idx = rows[i];
337     found = 0;
338     for ( j=0; j<size; j++ ) {
339       if (idx >= owners[j] && idx < owners[j+1]) {
340         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
341       }
342     }
343     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
344   }
345   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
346 
347   /* inform other processors of number of messages and max length*/
348   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
349   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
350   nrecvs = work[rank];
351   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
352   nmax = work[rank];
353   PetscFree(work);
354 
355   /* post receives:   */
356   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
357   CHKPTRQ(rvalues);
358   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
359   CHKPTRQ(recv_waits);
360   for ( i=0; i<nrecvs; i++ ) {
361     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
362   }
363 
364   /* do sends:
365       1) starts[i] gives the starting index in svalues for stuff going to
366          the ith processor
367   */
368   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
369   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
370   CHKPTRQ(send_waits);
371   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
372   starts[0] = 0;
373   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
374   for ( i=0; i<N; i++ ) {
375     svalues[starts[owner[i]]++] = rows[i];
376   }
377   ISRestoreIndices(is,&rows);
378 
379   starts[0] = 0;
380   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
381   count = 0;
382   for ( i=0; i<size; i++ ) {
383     if (procs[i]) {
384       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
385     }
386   }
387   PetscFree(starts);
388 
389   base = owners[rank];
390 
391   /*  wait on receives */
392   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
393   source = lens + nrecvs;
394   count  = nrecvs; slen = 0;
395   while (count) {
396     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
397     /* unpack receives into our local space */
398     MPI_Get_count(&recv_status,MPI_INT,&n);
399     source[imdex]  = recv_status.MPI_SOURCE;
400     lens[imdex]  = n;
401     slen += n;
402     count--;
403   }
404   PetscFree(recv_waits);
405 
406   /* move the data into the send scatter */
407   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
408   count = 0;
409   for ( i=0; i<nrecvs; i++ ) {
410     values = rvalues + i*nmax;
411     for ( j=0; j<lens[i]; j++ ) {
412       lrows[count++] = values[j] - base;
413     }
414   }
415   PetscFree(rvalues); PetscFree(lens);
416   PetscFree(owner); PetscFree(nprocs);
417 
418   /* actually zap the local rows */
419   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
420   PLogObjectParent(A,istmp);
421   PetscFree(lrows);
422   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
423   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
424   ierr = ISDestroy(istmp); CHKERRQ(ierr);
425 
426   /* wait on sends */
427   if (nsends) {
428     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
429     CHKPTRQ(send_status);
430     MPI_Waitall(nsends,send_waits,send_status);
431     PetscFree(send_status);
432   }
433   PetscFree(send_waits); PetscFree(svalues);
434 
435   return 0;
436 }
437 
438 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
439 {
440   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
441   int        ierr;
442 
443   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
444   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
445   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
446   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
447   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
448   return 0;
449 }
450 
451 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
452 {
453   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
454   int        ierr;
455   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
456   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
457   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
458   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
459   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
460   return 0;
461 }
462 
463 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
464 {
465   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
466   int        ierr;
467 
468   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
469   /* do nondiagonal part */
470   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
471   /* send it on its way */
472   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
473                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
474   /* do local part */
475   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
476   /* receive remote parts: note this assumes the values are not actually */
477   /* inserted in yy until the next line, which is true for my implementation*/
478   /* but is not perhaps always true. */
479   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
480                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
481   return 0;
482 }
483 
484 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
487   int        ierr;
488 
489   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
490   /* do nondiagonal part */
491   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
492   /* send it on its way */
493   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
494                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
495   /* do local part */
496   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
497   /* receive remote parts: note this assumes the values are not actually */
498   /* inserted in yy until the next line, which is true for my implementation*/
499   /* but is not perhaps always true. */
500   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
501                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
502   return 0;
503 }
504 
505 /*
506   This only works correctly for square matrices where the subblock A->A is the
507    diagonal block
508 */
509 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
510 {
511   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
512   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
513   return MatGetDiagonal(a->A,v);
514 }
515 
516 static int MatScale_MPIAIJ(Scalar *aa,Mat A)
517 {
518   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
519   int        ierr;
520   if (!a->assembled) SETERRQ(1,"MatScale_MPIAIJ:must assemble matrix");
521   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
522   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
523   return 0;
524 }
525 
526 static int MatDestroy_MPIAIJ(PetscObject obj)
527 {
528   Mat        mat = (Mat) obj;
529   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
530   int        ierr;
531 #if defined(PETSC_LOG)
532   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
533 #endif
534   PetscFree(aij->rowners);
535   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
536   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
537   if (aij->colmap) PetscFree(aij->colmap);
538   if (aij->garray) PetscFree(aij->garray);
539   if (aij->lvec)   VecDestroy(aij->lvec);
540   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
541   PetscFree(aij);
542   PLogObjectDestroy(mat);
543   PetscHeaderDestroy(mat);
544   return 0;
545 }
546 #include "draw.h"
547 #include "pinclude/pviewer.h"
548 
549 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
550 {
551   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
552   int         ierr;
553 
554   if (aij->size == 1) {
555     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
556   }
557   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
558   return 0;
559 }
560 
561 static int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
562 {
563   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
564   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
565   int         ierr, format,shift = C->indexshift,rank;
566   PetscObject vobj = (PetscObject) viewer;
567   FILE        *fd;
568 
569   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
570     ierr = ViewerFileGetFormat_Private(viewer,&format);
571     if (format == FILE_FORMAT_INFO_DETAILED) {
572       int nz,nzalloc,mem;
573       MPI_Comm_rank(mat->comm,&rank);
574       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
575       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
576       MPIU_Seq_begin(mat->comm,1);
577       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
578                 nzalloc,mem);
579       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
580       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
581       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
582       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
583       fflush(fd);
584       MPIU_Seq_end(mat->comm,1);
585       ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr);
586       return 0;
587     }
588     else if (format == FILE_FORMAT_INFO) {
589       return 0;
590     }
591   }
592 
593   if (vobj->type == ASCII_FILE_VIEWER) {
594     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
595     MPIU_Seq_begin(mat->comm,1);
596     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
597            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
598            aij->cend);
599     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
600     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
601     fflush(fd);
602     MPIU_Seq_end(mat->comm,1);
603   }
604   else {
605     int size = aij->size;
606     rank = aij->rank;
607     if (size == 1) {
608       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
609     }
610     else {
611       /* assemble the entire matrix onto first processor. */
612       Mat         A;
613       Mat_SeqAIJ *Aloc;
614       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
615       Scalar      *a;
616 
617       if (!rank) {
618         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
619                CHKERRQ(ierr);
620       }
621       else {
622         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
623                CHKERRQ(ierr);
624       }
625       PLogObjectParent(mat,A);
626 
627       /* copy over the A part */
628       Aloc = (Mat_SeqAIJ*) aij->A->data;
629       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
630       row = aij->rstart;
631       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
632       for ( i=0; i<m; i++ ) {
633         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
634         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
635       }
636       aj = Aloc->j;
637       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
638 
639       /* copy over the B part */
640       Aloc = (Mat_SeqAIJ*) aij->B->data;
641       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
642       row = aij->rstart;
643       ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
644       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
645       for ( i=0; i<m; i++ ) {
646         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
647         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
648       }
649       PetscFree(ct);
650       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
651       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
652       if (!rank) {
653         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
654       }
655       ierr = MatDestroy(A); CHKERRQ(ierr);
656     }
657   }
658   return 0;
659 }
660 
661 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
662 {
663   Mat         mat = (Mat) obj;
664   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
665   int         ierr;
666   PetscObject vobj = (PetscObject) viewer;
667 
668   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
669   if (!viewer) {
670     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
671   }
672   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
673   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
674     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
675   }
676   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
677     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
678   }
679   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == MATLAB_VIEWER) {
680     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
681   }
682   else if (vobj->cookie == DRAW_COOKIE) {
683     ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
684   }
685   else if (vobj->type == BINARY_FILE_VIEWER) {
686     return MatView_MPIAIJ_Binary(mat,viewer);
687   }
688   return 0;
689 }
690 
691 extern int MatMarkDiag_SeqAIJ(Mat);
692 /*
693     This has to provide several versions.
694 
695      1) per sequential
696      2) a) use only local smoothing updating outer values only once.
697         b) local smoothing updating outer values each inner iteration
698      3) color updating out values betwen colors.
699 */
700 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
701                            double fshift,int its,Vec xx)
702 {
703   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
704   Mat        AA = mat->A, BB = mat->B;
705   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
706   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
707   int        ierr,*idx, *diag;
708   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
709   Vec        tt;
710 
711   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
712 
713   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
714   xs = x + shift; /* shift by one for index start of 1 */
715   ls = ls + shift;
716   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
717   diag = A->diag;
718   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
719     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
720   }
721   if (flag & SOR_EISENSTAT) {
722     /* Let  A = L + U + D; where L is lower trianglar,
723     U is upper triangular, E is diagonal; This routine applies
724 
725             (L + E)^{-1} A (U + E)^{-1}
726 
727     to a vector efficiently using Eisenstat's trick. This is for
728     the case of SSOR preconditioner, so E is D/omega where omega
729     is the relaxation factor.
730     */
731     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
732     PLogObjectParent(matin,tt);
733     VecGetArray(tt,&t);
734     scale = (2.0/omega) - 1.0;
735     /*  x = (E + U)^{-1} b */
736     VecSet(&zero,mat->lvec);
737     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
738                               mat->Mvctx); CHKERRQ(ierr);
739     for ( i=m-1; i>-1; i-- ) {
740       n    = A->i[i+1] - diag[i] - 1;
741       idx  = A->j + diag[i] + !shift;
742       v    = A->a + diag[i] + !shift;
743       sum  = b[i];
744       SPARSEDENSEMDOT(sum,xs,v,idx,n);
745       d    = fshift + A->a[diag[i]+shift];
746       n    = B->i[i+1] - B->i[i];
747       idx  = B->j + B->i[i] + shift;
748       v    = B->a + B->i[i] + shift;
749       SPARSEDENSEMDOT(sum,ls,v,idx,n);
750       x[i] = omega*(sum/d);
751     }
752     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
753                             mat->Mvctx); CHKERRQ(ierr);
754 
755     /*  t = b - (2*E - D)x */
756     v = A->a;
757     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
758 
759     /*  t = (E + L)^{-1}t */
760     ts = t + shift; /* shifted by one for index start of a or mat->j*/
761     diag = A->diag;
762     VecSet(&zero,mat->lvec);
763     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
764                                                  mat->Mvctx); CHKERRQ(ierr);
765     for ( i=0; i<m; i++ ) {
766       n    = diag[i] - A->i[i];
767       idx  = A->j + A->i[i] + shift;
768       v    = A->a + A->i[i] + shift;
769       sum  = t[i];
770       SPARSEDENSEMDOT(sum,ts,v,idx,n);
771       d    = fshift + A->a[diag[i]+shift];
772       n    = B->i[i+1] - B->i[i];
773       idx  = B->j + B->i[i] + shift;
774       v    = B->a + B->i[i] + shift;
775       SPARSEDENSEMDOT(sum,ls,v,idx,n);
776       t[i] = omega*(sum/d);
777     }
778     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
779                                                     mat->Mvctx); CHKERRQ(ierr);
780     /*  x = x + t */
781     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
782     VecDestroy(tt);
783     return 0;
784   }
785 
786 
787   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
788     if (flag & SOR_ZERO_INITIAL_GUESS) {
789       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
790     }
791     else {
792       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
793       CHKERRQ(ierr);
794       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
795       CHKERRQ(ierr);
796     }
797     while (its--) {
798       /* go down through the rows */
799       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
800                               mat->Mvctx); CHKERRQ(ierr);
801       for ( i=0; i<m; i++ ) {
802         n    = A->i[i+1] - A->i[i];
803         idx  = A->j + A->i[i] + shift;
804         v    = A->a + A->i[i] + shift;
805         sum  = b[i];
806         SPARSEDENSEMDOT(sum,xs,v,idx,n);
807         d    = fshift + A->a[diag[i]+shift];
808         n    = B->i[i+1] - B->i[i];
809         idx  = B->j + B->i[i] + shift;
810         v    = B->a + B->i[i] + shift;
811         SPARSEDENSEMDOT(sum,ls,v,idx,n);
812         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
813       }
814       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
815                             mat->Mvctx); CHKERRQ(ierr);
816       /* come up through the rows */
817       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
818                               mat->Mvctx); CHKERRQ(ierr);
819       for ( i=m-1; i>-1; i-- ) {
820         n    = A->i[i+1] - A->i[i];
821         idx  = A->j + A->i[i] + shift;
822         v    = A->a + A->i[i] + shift;
823         sum  = b[i];
824         SPARSEDENSEMDOT(sum,xs,v,idx,n);
825         d    = fshift + A->a[diag[i]+shift];
826         n    = B->i[i+1] - B->i[i];
827         idx  = B->j + B->i[i] + shift;
828         v    = B->a + B->i[i] + shift;
829         SPARSEDENSEMDOT(sum,ls,v,idx,n);
830         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
831       }
832       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
833                             mat->Mvctx); CHKERRQ(ierr);
834     }
835   }
836   else if (flag & SOR_FORWARD_SWEEP){
837     if (flag & SOR_ZERO_INITIAL_GUESS) {
838       VecSet(&zero,mat->lvec);
839       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
840                               mat->Mvctx); CHKERRQ(ierr);
841       for ( i=0; i<m; i++ ) {
842         n    = diag[i] - A->i[i];
843         idx  = A->j + A->i[i] + shift;
844         v    = A->a + A->i[i] + shift;
845         sum  = b[i];
846         SPARSEDENSEMDOT(sum,xs,v,idx,n);
847         d    = fshift + A->a[diag[i]+shift];
848         n    = B->i[i+1] - B->i[i];
849         idx  = B->j + B->i[i] + shift;
850         v    = B->a + B->i[i] + shift;
851         SPARSEDENSEMDOT(sum,ls,v,idx,n);
852         x[i] = omega*(sum/d);
853       }
854       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
855                             mat->Mvctx); CHKERRQ(ierr);
856       its--;
857     }
858     while (its--) {
859       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
860       CHKERRQ(ierr);
861       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
862       CHKERRQ(ierr);
863       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
864                               mat->Mvctx); CHKERRQ(ierr);
865       for ( i=0; i<m; i++ ) {
866         n    = A->i[i+1] - A->i[i];
867         idx  = A->j + A->i[i] + shift;
868         v    = A->a + A->i[i] + shift;
869         sum  = b[i];
870         SPARSEDENSEMDOT(sum,xs,v,idx,n);
871         d    = fshift + A->a[diag[i]+shift];
872         n    = B->i[i+1] - B->i[i];
873         idx  = B->j + B->i[i] + shift;
874         v    = B->a + B->i[i] + shift;
875         SPARSEDENSEMDOT(sum,ls,v,idx,n);
876         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
877       }
878       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
879                             mat->Mvctx); CHKERRQ(ierr);
880     }
881   }
882   else if (flag & SOR_BACKWARD_SWEEP){
883     if (flag & SOR_ZERO_INITIAL_GUESS) {
884       VecSet(&zero,mat->lvec);
885       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
886                               mat->Mvctx); CHKERRQ(ierr);
887       for ( i=m-1; i>-1; i-- ) {
888         n    = A->i[i+1] - diag[i] - 1;
889         idx  = A->j + diag[i] + !shift;
890         v    = A->a + diag[i] + !shift;
891         sum  = b[i];
892         SPARSEDENSEMDOT(sum,xs,v,idx,n);
893         d    = fshift + A->a[diag[i]+shift];
894         n    = B->i[i+1] - B->i[i];
895         idx  = B->j + B->i[i] + shift;
896         v    = B->a + B->i[i] + shift;
897         SPARSEDENSEMDOT(sum,ls,v,idx,n);
898         x[i] = omega*(sum/d);
899       }
900       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
901                             mat->Mvctx); CHKERRQ(ierr);
902       its--;
903     }
904     while (its--) {
905       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
906                             mat->Mvctx); CHKERRQ(ierr);
907       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
908                             mat->Mvctx); CHKERRQ(ierr);
909       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
910                               mat->Mvctx); CHKERRQ(ierr);
911       for ( i=m-1; i>-1; i-- ) {
912         n    = A->i[i+1] - A->i[i];
913         idx  = A->j + A->i[i] + shift;
914         v    = A->a + A->i[i] + shift;
915         sum  = b[i];
916         SPARSEDENSEMDOT(sum,xs,v,idx,n);
917         d    = fshift + A->a[diag[i]+shift];
918         n    = B->i[i+1] - B->i[i];
919         idx  = B->j + B->i[i] + shift;
920         v    = B->a + B->i[i] + shift;
921         SPARSEDENSEMDOT(sum,ls,v,idx,n);
922         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
923       }
924       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
925                             mat->Mvctx); CHKERRQ(ierr);
926     }
927   }
928   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
929     if (flag & SOR_ZERO_INITIAL_GUESS) {
930       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
931     }
932     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
933     CHKERRQ(ierr);
934     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
935     CHKERRQ(ierr);
936     while (its--) {
937       /* go down through the rows */
938       for ( i=0; i<m; i++ ) {
939         n    = A->i[i+1] - A->i[i];
940         idx  = A->j + A->i[i] + shift;
941         v    = A->a + A->i[i] + shift;
942         sum  = b[i];
943         SPARSEDENSEMDOT(sum,xs,v,idx,n);
944         d    = fshift + A->a[diag[i]+shift];
945         n    = B->i[i+1] - B->i[i];
946         idx  = B->j + B->i[i] + shift;
947         v    = B->a + B->i[i] + shift;
948         SPARSEDENSEMDOT(sum,ls,v,idx,n);
949         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
950       }
951       /* come up through the rows */
952       for ( i=m-1; i>-1; i-- ) {
953         n    = A->i[i+1] - A->i[i];
954         idx  = A->j + A->i[i] + shift;
955         v    = A->a + A->i[i] + shift;
956         sum  = b[i];
957         SPARSEDENSEMDOT(sum,xs,v,idx,n);
958         d    = fshift + A->a[diag[i]+shift];
959         n    = B->i[i+1] - B->i[i];
960         idx  = B->j + B->i[i] + shift;
961         v    = B->a + B->i[i] + shift;
962         SPARSEDENSEMDOT(sum,ls,v,idx,n);
963         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
964       }
965     }
966   }
967   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
968     if (flag & SOR_ZERO_INITIAL_GUESS) {
969       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
970     }
971     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
972     CHKERRQ(ierr);
973     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
974     CHKERRQ(ierr);
975     while (its--) {
976       for ( i=0; i<m; i++ ) {
977         n    = A->i[i+1] - A->i[i];
978         idx  = A->j + A->i[i] + shift;
979         v    = A->a + A->i[i] + shift;
980         sum  = b[i];
981         SPARSEDENSEMDOT(sum,xs,v,idx,n);
982         d    = fshift + A->a[diag[i]+shift];
983         n    = B->i[i+1] - B->i[i];
984         idx  = B->j + B->i[i] + shift;
985         v    = B->a + B->i[i] + shift;
986         SPARSEDENSEMDOT(sum,ls,v,idx,n);
987         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
988       }
989     }
990   }
991   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
992     if (flag & SOR_ZERO_INITIAL_GUESS) {
993       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
994     }
995     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
996                             mat->Mvctx); CHKERRQ(ierr);
997     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
998                             mat->Mvctx); CHKERRQ(ierr);
999     while (its--) {
1000       for ( i=m-1; i>-1; i-- ) {
1001         n    = A->i[i+1] - A->i[i];
1002         idx  = A->j + A->i[i] + shift;
1003         v    = A->a + A->i[i] + shift;
1004         sum  = b[i];
1005         SPARSEDENSEMDOT(sum,xs,v,idx,n);
1006         d    = fshift + A->a[diag[i]+shift];
1007         n    = B->i[i+1] - B->i[i];
1008         idx  = B->j + B->i[i] + shift;
1009         v    = B->a + B->i[i] + shift;
1010         SPARSEDENSEMDOT(sum,ls,v,idx,n);
1011         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
1012       }
1013     }
1014   }
1015   return 0;
1016 }
1017 
1018 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
1019                              int *nzalloc,int *mem)
1020 {
1021   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1022   Mat        A = mat->A, B = mat->B;
1023   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
1024 
1025   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
1026   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
1027   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
1028   if (flag == MAT_LOCAL) {
1029     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
1030   } else if (flag == MAT_GLOBAL_MAX) {
1031     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
1032     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1033   } else if (flag == MAT_GLOBAL_SUM) {
1034     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
1035     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
1036   }
1037   return 0;
1038 }
1039 
1040 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
1041 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
1042 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
1043 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
1044 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
1045 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1046 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
1047 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
1048 
1049 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
1050 {
1051   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1052 
1053   if (op == NO_NEW_NONZERO_LOCATIONS ||
1054       op == YES_NEW_NONZERO_LOCATIONS ||
1055       op == COLUMNS_SORTED ||
1056       op == ROW_ORIENTED) {
1057         MatSetOption(a->A,op);
1058         MatSetOption(a->B,op);
1059   }
1060   else if (op == ROWS_SORTED ||
1061            op == SYMMETRIC_MATRIX ||
1062            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1063            op == YES_NEW_DIAGONALS)
1064     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1065   else if (op == COLUMN_ORIENTED) {
1066     a->roworiented = 0;
1067     MatSetOption(a->A,op);
1068     MatSetOption(a->B,op);
1069   }
1070   else if (op == NO_NEW_DIAGONALS)
1071     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1072   else
1073     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1074   return 0;
1075 }
1076 
1077 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1078 {
1079   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1080   *m = mat->M; *n = mat->N;
1081   return 0;
1082 }
1083 
1084 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1085 {
1086   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1087   *m = mat->m; *n = mat->N;
1088   return 0;
1089 }
1090 
1091 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1092 {
1093   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1094   *m = mat->rstart; *n = mat->rend;
1095   return 0;
1096 }
1097 
1098 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1099 {
1100   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1101   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1102   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1103   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1104 
1105   if (!mat->assembled) SETERRQ(1,"MatGetRow_MPIAIJ:Must assemble matrix");
1106   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:Only local rows")
1107   lrow = row - rstart;
1108 
1109   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1110   if (!v)   {pvA = 0; pvB = 0;}
1111   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1112   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1113   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1114   nztot = nzA + nzB;
1115 
1116   if (v  || idx) {
1117     if (nztot) {
1118       /* Sort by increasing column numbers, assuming A and B already sorted */
1119       int imark;
1120       if (mat->assembled) {
1121         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1122       }
1123       if (v) {
1124         *v = (Scalar *) PetscMalloc( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1125         for ( i=0; i<nzB; i++ ) {
1126           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1127           else break;
1128         }
1129         imark = i;
1130         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1131         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1132       }
1133       if (idx) {
1134         *idx = (int *) PetscMalloc( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1135         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1136         for ( i=0; i<nzB; i++ ) {
1137           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1138           else break;
1139         }
1140         imark = i;
1141         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1142         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1143       }
1144     }
1145     else {*idx = 0; *v=0;}
1146   }
1147   *nz = nztot;
1148   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1149   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1150   return 0;
1151 }
1152 
1153 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1154 {
1155   if (idx) PetscFree(*idx);
1156   if (v) PetscFree(*v);
1157   return 0;
1158 }
1159 
1160 static int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1161 {
1162   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1163   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1164   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1165   double     sum = 0.0;
1166   Scalar     *v;
1167 
1168   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1169   if (aij->size == 1) {
1170     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1171   } else {
1172     if (type == NORM_FROBENIUS) {
1173       v = amat->a;
1174       for (i=0; i<amat->nz; i++ ) {
1175 #if defined(PETSC_COMPLEX)
1176         sum += real(conj(*v)*(*v)); v++;
1177 #else
1178         sum += (*v)*(*v); v++;
1179 #endif
1180       }
1181       v = bmat->a;
1182       for (i=0; i<bmat->nz; i++ ) {
1183 #if defined(PETSC_COMPLEX)
1184         sum += real(conj(*v)*(*v)); v++;
1185 #else
1186         sum += (*v)*(*v); v++;
1187 #endif
1188       }
1189       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1190       *norm = sqrt(*norm);
1191     }
1192     else if (type == NORM_1) { /* max column norm */
1193       double *tmp, *tmp2;
1194       int    *jj, *garray = aij->garray;
1195       tmp  = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1196       tmp2 = (double *) PetscMalloc( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1197       PetscMemzero(tmp,aij->N*sizeof(double));
1198       *norm = 0.0;
1199       v = amat->a; jj = amat->j;
1200       for ( j=0; j<amat->nz; j++ ) {
1201         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1202       }
1203       v = bmat->a; jj = bmat->j;
1204       for ( j=0; j<bmat->nz; j++ ) {
1205         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1206       }
1207       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1208       for ( j=0; j<aij->N; j++ ) {
1209         if (tmp2[j] > *norm) *norm = tmp2[j];
1210       }
1211       PetscFree(tmp); PetscFree(tmp2);
1212     }
1213     else if (type == NORM_INFINITY) { /* max row norm */
1214       double ntemp = 0.0;
1215       for ( j=0; j<amat->m; j++ ) {
1216         v = amat->a + amat->i[j] + shift;
1217         sum = 0.0;
1218         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1219           sum += PetscAbsScalar(*v); v++;
1220         }
1221         v = bmat->a + bmat->i[j] + shift;
1222         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1223           sum += PetscAbsScalar(*v); v++;
1224         }
1225         if (sum > ntemp) ntemp = sum;
1226       }
1227       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1228     }
1229     else {
1230       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1231     }
1232   }
1233   return 0;
1234 }
1235 
1236 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1237 {
1238   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1239   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1240   int        ierr,shift = Aloc->indexshift;
1241   Mat        B;
1242   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1243   Scalar     *array;
1244 
1245   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1246   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,
1247          PETSC_NULL,&B); CHKERRQ(ierr);
1248 
1249   /* copy over the A part */
1250   Aloc = (Mat_SeqAIJ*) a->A->data;
1251   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1252   row = a->rstart;
1253   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1254   for ( i=0; i<m; i++ ) {
1255     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1256     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1257   }
1258   aj = Aloc->j;
1259   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1260 
1261   /* copy over the B part */
1262   Aloc = (Mat_SeqAIJ*) a->B->data;
1263   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1264   row = a->rstart;
1265   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1266   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1267   for ( i=0; i<m; i++ ) {
1268     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1269     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1270   }
1271   PetscFree(ct);
1272   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1273   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1274   if (matout) {
1275     *matout = B;
1276   } else {
1277     /* This isn't really an in-place transpose .... but free data structures from a */
1278     PetscFree(a->rowners);
1279     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1280     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1281     if (a->colmap) PetscFree(a->colmap);
1282     if (a->garray) PetscFree(a->garray);
1283     if (a->lvec) VecDestroy(a->lvec);
1284     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1285     PetscFree(a);
1286     PetscMemcpy(A,B,sizeof(struct _Mat));
1287     PetscHeaderDestroy(B);
1288   }
1289   return 0;
1290 }
1291 
1292 extern int MatPrintHelp_SeqAIJ(Mat);
1293 static int MatPrintHelp_MPIAIJ(Mat A)
1294 {
1295   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1296 
1297   if (!a->rank) return MatPrintHelp_SeqAIJ(a->A);
1298   else return 0;
1299 }
1300 
1301 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1302 static int MatConvertSameType_MPIAIJ(Mat,Mat *,int);
1303 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1304 /* -------------------------------------------------------------------*/
1305 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1306        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1307        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1308        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1309        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1310        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1311        MatLUFactor_MPIAIJ,0,
1312        MatRelax_MPIAIJ,
1313        MatTranspose_MPIAIJ,
1314        MatGetInfo_MPIAIJ,0,
1315        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1316        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1317        0,
1318        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1319        MatGetReordering_MPIAIJ,
1320        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1321        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1322        MatILUFactorSymbolic_MPIAIJ,0,
1323        0,0,MatConvert_MPIAIJ,0,0,MatConvertSameType_MPIAIJ,0,0,
1324        0,0,0,
1325        0,0,MatGetValues_MPIAIJ,0,
1326        MatPrintHelp_MPIAIJ,
1327        MatScale_MPIAIJ};
1328 
1329 /*@C
1330    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1331    (the default parallel PETSc format).
1332 
1333    Input Parameters:
1334 .  comm - MPI communicator
1335 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1336 .  n - number of local columns (or PETSC_DECIDE to have calculated
1337            if N is given)
1338 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1339 .  N - number of global columns (or PETSC_DECIDE to have calculated
1340            if n is given)
1341 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1342            (same for all local rows)
1343 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1344            or null (possibly different for each row).  You must leave room
1345            for the diagonal entry even if it is zero.
1346 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1347            submatrix (same for all local rows).
1348 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1349            submatrix or null (possibly different for each row).
1350 
1351    Output Parameter:
1352 .  newmat - the matrix
1353 
1354    Notes:
1355    The AIJ format (also called the Yale sparse matrix format or
1356    compressed row storage), is fully compatible with standard Fortran 77
1357    storage.  That is, the stored row and column indices can begin at
1358    either one (as in Fortran) or zero.  See the users manual for details.
1359 
1360    The user MUST specify either the local or global matrix dimensions
1361    (possibly both).
1362 
1363    Storage Information:
1364    For a square global matrix we define each processor's diagonal portion
1365    to be its local rows and the corresponding columns (a square submatrix);
1366    each processor's off-diagonal portion encompasses the remainder of the
1367    local matrix (a rectangular submatrix).
1368 
1369    The user can specify preallocated storage for the diagonal part of
1370    the local submatrix with either d_nz or d_nnz (not both).  Set
1371    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1372    memory allocation.  Likewise, specify preallocated storage for the
1373    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1374 
1375    By default, this format uses inodes (identical nodes) when possible.
1376    We search for consecutive rows with the same nonzero structure, thereby
1377    reusing matrix information to achieve increased efficiency.
1378 
1379    Options Database Keys:
1380 $    -mat_aij_no_inode  - Do not use inodes
1381 $    -mat_aij_inode_limit <limit> - Set inode limit (max limit=5)
1382 
1383 .keywords: matrix, aij, compressed row, sparse, parallel
1384 
1385 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1386 @*/
1387 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1388                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *newmat)
1389 {
1390   Mat          mat;
1391   Mat_MPIAIJ   *a;
1392   int          ierr, i,sum[2],work[2];
1393 
1394   *newmat         = 0;
1395   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1396   PLogObjectCreate(mat);
1397   mat->data       = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1398   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1399   mat->destroy    = MatDestroy_MPIAIJ;
1400   mat->view       = MatView_MPIAIJ;
1401   mat->factor     = 0;
1402 
1403   a->insertmode = NOT_SET_VALUES;
1404   MPI_Comm_rank(comm,&a->rank);
1405   MPI_Comm_size(comm,&a->size);
1406 
1407   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1408     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1409 
1410   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1411     work[0] = m; work[1] = n;
1412     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1413     if (M == PETSC_DECIDE) M = sum[0];
1414     if (N == PETSC_DECIDE) N = sum[1];
1415   }
1416   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1417   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1418   a->m = m;
1419   a->n = n;
1420   a->N = N;
1421   a->M = M;
1422 
1423   /* build local table of row and column ownerships */
1424   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1425   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1426   a->cowners = a->rowners + a->size + 1;
1427   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1428   a->rowners[0] = 0;
1429   for ( i=2; i<=a->size; i++ ) {
1430     a->rowners[i] += a->rowners[i-1];
1431   }
1432   a->rstart = a->rowners[a->rank];
1433   a->rend   = a->rowners[a->rank+1];
1434   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1435   a->cowners[0] = 0;
1436   for ( i=2; i<=a->size; i++ ) {
1437     a->cowners[i] += a->cowners[i-1];
1438   }
1439   a->cstart = a->cowners[a->rank];
1440   a->cend   = a->cowners[a->rank+1];
1441 
1442   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1443   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1444   PLogObjectParent(mat,a->A);
1445   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1446   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1447   PLogObjectParent(mat,a->B);
1448 
1449   /* build cache for off array entries formed */
1450   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1451   a->colmap      = 0;
1452   a->garray      = 0;
1453   a->roworiented = 1;
1454 
1455   /* stuff used for matrix vector multiply */
1456   a->lvec      = 0;
1457   a->Mvctx     = 0;
1458   a->assembled = 0;
1459 
1460   *newmat = mat;
1461   return 0;
1462 }
1463 
1464 static int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1465 {
1466   Mat        mat;
1467   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1468   int        ierr, len,flg;
1469 
1470   if (!oldmat->assembled) SETERRQ(1,"MatConvertSameType_MPIAIJ:Must assemble matrix");
1471   *newmat       = 0;
1472   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1473   PLogObjectCreate(mat);
1474   mat->data     = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a);
1475   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1476   mat->destroy  = MatDestroy_MPIAIJ;
1477   mat->view     = MatView_MPIAIJ;
1478   mat->factor   = matin->factor;
1479 
1480   a->m          = oldmat->m;
1481   a->n          = oldmat->n;
1482   a->M          = oldmat->M;
1483   a->N          = oldmat->N;
1484 
1485   a->assembled  = 1;
1486   a->rstart     = oldmat->rstart;
1487   a->rend       = oldmat->rend;
1488   a->cstart     = oldmat->cstart;
1489   a->cend       = oldmat->cend;
1490   a->size       = oldmat->size;
1491   a->rank       = oldmat->rank;
1492   a->insertmode = NOT_SET_VALUES;
1493 
1494   a->rowners = (int *) PetscMalloc((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1495   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1496   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1497   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1498   if (oldmat->colmap) {
1499     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1500     PLogObjectMemory(mat,(a->N)*sizeof(int));
1501     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1502   } else a->colmap = 0;
1503   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1504     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
1505     PLogObjectMemory(mat,len*sizeof(int));
1506     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1507   } else a->garray = 0;
1508 
1509   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1510   PLogObjectParent(mat,a->lvec);
1511   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1512   PLogObjectParent(mat,a->Mvctx);
1513   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1514   PLogObjectParent(mat,a->A);
1515   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1516   PLogObjectParent(mat,a->B);
1517   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
1518   if (flg) {
1519     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
1520   }
1521   *newmat = mat;
1522   return 0;
1523 }
1524 
1525 #include "sysio.h"
1526 
1527 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1528 {
1529   Mat          A;
1530   int          i, nz, ierr, j,rstart, rend, fd;
1531   Scalar       *vals,*svals;
1532   PetscObject  vobj = (PetscObject) bview;
1533   MPI_Comm     comm = vobj->comm;
1534   MPI_Status   status;
1535   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1536   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1537   int          tag = ((PetscObject)bview)->tag;
1538 
1539   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1540   if (!rank) {
1541     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1542     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1543     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1544   }
1545 
1546   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1547   M = header[1]; N = header[2];
1548   /* determine ownership of all rows */
1549   m = M/size + ((M % size) > rank);
1550   rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners);
1551   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1552   rowners[0] = 0;
1553   for ( i=2; i<=size; i++ ) {
1554     rowners[i] += rowners[i-1];
1555   }
1556   rstart = rowners[rank];
1557   rend   = rowners[rank+1];
1558 
1559   /* distribute row lengths to all processors */
1560   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1561   offlens = ourlens + (rend-rstart);
1562   if (!rank) {
1563     rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths);
1564     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1565     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1566     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1567     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1568     PetscFree(sndcounts);
1569   }
1570   else {
1571     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1572   }
1573 
1574   if (!rank) {
1575     /* calculate the number of nonzeros on each processor */
1576     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
1577     PetscMemzero(procsnz,size*sizeof(int));
1578     for ( i=0; i<size; i++ ) {
1579       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1580         procsnz[i] += rowlengths[j];
1581       }
1582     }
1583     PetscFree(rowlengths);
1584 
1585     /* determine max buffer needed and allocate it */
1586     maxnz = 0;
1587     for ( i=0; i<size; i++ ) {
1588       maxnz = PetscMax(maxnz,procsnz[i]);
1589     }
1590     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
1591 
1592     /* read in my part of the matrix column indices  */
1593     nz = procsnz[0];
1594     mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1595     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1596 
1597     /* read in every one elses and ship off */
1598     for ( i=1; i<size; i++ ) {
1599       nz = procsnz[i];
1600       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1601       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1602     }
1603     PetscFree(cols);
1604   }
1605   else {
1606     /* determine buffer space needed for message */
1607     nz = 0;
1608     for ( i=0; i<m; i++ ) {
1609       nz += ourlens[i];
1610     }
1611     mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols);
1612 
1613     /* receive message of column indices*/
1614     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1615     MPI_Get_count(&status,MPI_INT,&maxnz);
1616     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1617   }
1618 
1619   /* loop over local rows, determining number of off diagonal entries */
1620   PetscMemzero(offlens,m*sizeof(int));
1621   jj = 0;
1622   for ( i=0; i<m; i++ ) {
1623     for ( j=0; j<ourlens[i]; j++ ) {
1624       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1625       jj++;
1626     }
1627   }
1628 
1629   /* create our matrix */
1630   for ( i=0; i<m; i++ ) {
1631     ourlens[i] -= offlens[i];
1632   }
1633   if (type == MATMPIAIJ) {
1634     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1635   }
1636   else if (type == MATMPIROW) {
1637     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1638   }
1639   A = *newmat;
1640   MatSetOption(A,COLUMNS_SORTED);
1641   for ( i=0; i<m; i++ ) {
1642     ourlens[i] += offlens[i];
1643   }
1644 
1645   if (!rank) {
1646     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1647 
1648     /* read in my part of the matrix numerical values  */
1649     nz = procsnz[0];
1650     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1651 
1652     /* insert into matrix */
1653     jj      = rstart;
1654     smycols = mycols;
1655     svals   = vals;
1656     for ( i=0; i<m; i++ ) {
1657       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1658       smycols += ourlens[i];
1659       svals   += ourlens[i];
1660       jj++;
1661     }
1662 
1663     /* read in other processors and ship out */
1664     for ( i=1; i<size; i++ ) {
1665       nz = procsnz[i];
1666       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1667       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1668     }
1669     PetscFree(procsnz);
1670   }
1671   else {
1672     /* receive numeric values */
1673     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1674 
1675     /* receive message of values*/
1676     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1677     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1678     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1679 
1680     /* insert into matrix */
1681     jj      = rstart;
1682     smycols = mycols;
1683     svals   = vals;
1684     for ( i=0; i<m; i++ ) {
1685       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1686       smycols += ourlens[i];
1687       svals   += ourlens[i];
1688       jj++;
1689     }
1690   }
1691   PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners);
1692 
1693   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1694   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1695   return 0;
1696 }
1697