xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 08480c60afa5ef1d2e4e27b9ebdf48b02c6a2186)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.94 1995/10/23 18:37:47 curfman Exp bsmith $";
3 #endif
4 
5 #include "mpiaij.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 /* local utility routine that creates a mapping from the global column
10 number to the local number in the off-diagonal part of the local
11 storage of the matrix.  This is done in a non scable way since the
12 length of colmap equals the global matrix length.
13 */
14 static int CreateColmap_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
17   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
18   int        n = B->n,i,shift = B->indexshift;
19 
20   aij->colmap = (int *) PETSCMALLOC(aij->N*sizeof(int));CHKPTRQ(aij->colmap);
21   PLogObjectMemory(mat,aij->N*sizeof(int));
22   PetscZero(aij->colmap,aij->N*sizeof(int));
23   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i-shift;
24   return 0;
25 }
26 
27 extern int DisAssemble_MPIAIJ(Mat);
28 
29 static int MatGetReordering_MPIAIJ(Mat mat,MatOrdering type,IS *rperm,IS *cperm)
30 {
31   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
32   int ierr;
33   if (aij->size == 1) {
34     ierr = MatGetReordering(aij->A,type,rperm,cperm); CHKERRQ(ierr);
35   } else SETERRQ(1,"MatGetReordering_MPIAIJ:not supported in parallel");
36   return 0;
37 }
38 
39 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
40                                int *idxn,Scalar *v,InsertMode addv)
41 {
42   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
43   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
44   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
45   int        cstart = aij->cstart, cend = aij->cend,row,col;
46   int        shift = C->indexshift;
47 
48   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
49     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
50   }
51   aij->insertmode = addv;
52   for ( i=0; i<m; i++ ) {
53     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
54     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
55     if (idxm[i] >= rstart && idxm[i] < rend) {
56       row = idxm[i] - rstart;
57       for ( j=0; j<n; j++ ) {
58         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
59         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
60         if (idxn[j] >= cstart && idxn[j] < cend){
61           col = idxn[j] - cstart;
62           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
63         }
64         else {
65           if (aij->assembled) {
66             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
67             col = aij->colmap[idxn[j]] + shift;
68             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
69               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
70               col =  idxn[j];
71             }
72           }
73           else col = idxn[j];
74           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
75         }
76       }
77     }
78     else {
79       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
80     }
81   }
82   return 0;
83 }
84 
85 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
86 {
87   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
88   MPI_Comm    comm = mat->comm;
89   int         size = aij->size, *owners = aij->rowners;
90   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
91   MPI_Request *send_waits,*recv_waits;
92   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
93   InsertMode  addv;
94   Scalar      *rvalues,*svalues;
95 
96   /* make sure all processors are either in INSERTMODE or ADDMODE */
97   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
98   if (addv == (ADD_VALUES|INSERT_VALUES)) {
99     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
100   }
101   aij->insertmode = addv; /* in case this processor had no cache */
102 
103   /*  first count number of contributors to each processor */
104   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
105   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
106   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
107   for ( i=0; i<aij->stash.n; i++ ) {
108     idx = aij->stash.idx[i];
109     for ( j=0; j<size; j++ ) {
110       if (idx >= owners[j] && idx < owners[j+1]) {
111         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
112       }
113     }
114   }
115   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
116 
117   /* inform other processors of number of messages and max length*/
118   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
119   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
120   nreceives = work[rank];
121   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
122   nmax = work[rank];
123   PETSCFREE(work);
124 
125   /* post receives:
126        1) each message will consist of ordered pairs
127      (global index,value) we store the global index as a double
128      to simplify the message passing.
129        2) since we don't know how long each individual message is we
130      allocate the largest needed buffer for each receive. Potentially
131      this is a lot of wasted space.
132 
133 
134        This could be done better.
135   */
136   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
137   CHKPTRQ(rvalues);
138   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
139   CHKPTRQ(recv_waits);
140   for ( i=0; i<nreceives; i++ ) {
141     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
142               comm,recv_waits+i);
143   }
144 
145   /* do sends:
146       1) starts[i] gives the starting index in svalues for stuff going to
147          the ith processor
148   */
149   svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
150   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
151   CHKPTRQ(send_waits);
152   starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts);
153   starts[0] = 0;
154   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
155   for ( i=0; i<aij->stash.n; i++ ) {
156     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
157     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
158     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
159   }
160   PETSCFREE(owner);
161   starts[0] = 0;
162   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
163   count = 0;
164   for ( i=0; i<size; i++ ) {
165     if (procs[i]) {
166       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
167                 comm,send_waits+count++);
168     }
169   }
170   PETSCFREE(starts); PETSCFREE(nprocs);
171 
172   /* Free cache space */
173   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
174 
175   aij->svalues    = svalues;    aij->rvalues    = rvalues;
176   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
177   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
178   aij->rmax       = nmax;
179 
180   return 0;
181 }
182 extern int MatSetUpMultiply_MPIAIJ(Mat);
183 
184 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
185 {
186   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
187   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
188   MPI_Status  *send_status,recv_status;
189   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
190   int         row,col,other_disassembled,shift = C->indexshift;
191   Scalar      *values,val;
192   InsertMode  addv = aij->insertmode;
193 
194   /*  wait on receives */
195   while (count) {
196     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
197     /* unpack receives into our local space */
198     values = aij->rvalues + 3*imdex*aij->rmax;
199     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
200     n = n/3;
201     for ( i=0; i<n; i++ ) {
202       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
203       col = (int) PETSCREAL(values[3*i+1]);
204       val = values[3*i+2];
205       if (col >= aij->cstart && col < aij->cend) {
206         col -= aij->cstart;
207         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
208       }
209       else {
210         if (aij->assembled) {
211           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
212           col = aij->colmap[col] + shift;
213           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
214             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
215             col = (int) PETSCREAL(values[3*i+1]);
216           }
217         }
218         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
219       }
220     }
221     count--;
222   }
223   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
224 
225   /* wait on sends */
226   if (aij->nsends) {
227     send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status));
228     CHKPTRQ(send_status);
229     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
230     PETSCFREE(send_status);
231   }
232   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
233 
234   aij->insertmode = NOT_SET_VALUES;
235   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
237 
238   /* determine if any processor has disassembled, if so we must
239      also disassemble ourselfs, in order that we may reassemble. */
240   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
241   if (aij->assembled && !other_disassembled) {
242     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
243   }
244 
245   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
246     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
247     aij->assembled = 1;
248   }
249   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
251 
252   return 0;
253 }
254 
255 static int MatZeroEntries_MPIAIJ(Mat A)
256 {
257   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
258   int        ierr;
259   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
260   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
261   return 0;
262 }
263 
264 /* the code does not do the diagonal entries correctly unless the
265    matrix is square and the column and row owerships are identical.
266    This is a BUG. The only way to fix it seems to be to access
267    aij->A and aij->B directly and not through the MatZeroRows()
268    routine.
269 */
270 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
271 {
272   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
273   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
274   int            *procs,*nprocs,j,found,idx,nsends,*work;
275   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
276   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
277   int            *lens,imdex,*lrows,*values;
278   MPI_Comm       comm = A->comm;
279   MPI_Request    *send_waits,*recv_waits;
280   MPI_Status     recv_status,*send_status;
281   IS             istmp;
282 
283   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
284   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
285   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
286 
287   /*  first count number of contributors to each processor */
288   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
289   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
290   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
291   for ( i=0; i<N; i++ ) {
292     idx = rows[i];
293     found = 0;
294     for ( j=0; j<size; j++ ) {
295       if (idx >= owners[j] && idx < owners[j+1]) {
296         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
297       }
298     }
299     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
300   }
301   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
302 
303   /* inform other processors of number of messages and max length*/
304   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
305   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
306   nrecvs = work[rank];
307   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
308   nmax = work[rank];
309   PETSCFREE(work);
310 
311   /* post receives:   */
312   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
313   CHKPTRQ(rvalues);
314   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
315   CHKPTRQ(recv_waits);
316   for ( i=0; i<nrecvs; i++ ) {
317     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
318   }
319 
320   /* do sends:
321       1) starts[i] gives the starting index in svalues for stuff going to
322          the ith processor
323   */
324   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
325   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
326   CHKPTRQ(send_waits);
327   starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts);
328   starts[0] = 0;
329   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
330   for ( i=0; i<N; i++ ) {
331     svalues[starts[owner[i]]++] = rows[i];
332   }
333   ISRestoreIndices(is,&rows);
334 
335   starts[0] = 0;
336   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
337   count = 0;
338   for ( i=0; i<size; i++ ) {
339     if (procs[i]) {
340       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
341     }
342   }
343   PETSCFREE(starts);
344 
345   base = owners[rank];
346 
347   /*  wait on receives */
348   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
349   source = lens + nrecvs;
350   count  = nrecvs; slen = 0;
351   while (count) {
352     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
353     /* unpack receives into our local space */
354     MPI_Get_count(&recv_status,MPI_INT,&n);
355     source[imdex]  = recv_status.MPI_SOURCE;
356     lens[imdex]  = n;
357     slen += n;
358     count--;
359   }
360   PETSCFREE(recv_waits);
361 
362   /* move the data into the send scatter */
363   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
364   count = 0;
365   for ( i=0; i<nrecvs; i++ ) {
366     values = rvalues + i*nmax;
367     for ( j=0; j<lens[i]; j++ ) {
368       lrows[count++] = values[j] - base;
369     }
370   }
371   PETSCFREE(rvalues); PETSCFREE(lens);
372   PETSCFREE(owner); PETSCFREE(nprocs);
373 
374   /* actually zap the local rows */
375   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
376   PLogObjectParent(A,istmp);
377   PETSCFREE(lrows);
378   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
379   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
380   ierr = ISDestroy(istmp); CHKERRQ(ierr);
381 
382   /* wait on sends */
383   if (nsends) {
384     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
385     CHKPTRQ(send_status);
386     MPI_Waitall(nsends,send_waits,send_status);
387     PETSCFREE(send_status);
388   }
389   PETSCFREE(send_waits); PETSCFREE(svalues);
390 
391   return 0;
392 }
393 
394 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
395 {
396   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
397   int        ierr;
398 
399   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
400   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
401   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
402   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
403   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
404   return 0;
405 }
406 
407 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
408 {
409   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
410   int        ierr;
411   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
412   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
413   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
414   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
415   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
416   return 0;
417 }
418 
419 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
420 {
421   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
422   int        ierr;
423 
424   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
425   /* do nondiagonal part */
426   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
427   /* send it on its way */
428   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
429                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
430   /* do local part */
431   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
432   /* receive remote parts: note this assumes the values are not actually */
433   /* inserted in yy until the next line, which is true for my implementation*/
434   /* but is not perhaps always true. */
435   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
436                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
437   return 0;
438 }
439 
440 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
441 {
442   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
443   int        ierr;
444 
445   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
446   /* do nondiagonal part */
447   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
448   /* send it on its way */
449   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
450                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
451   /* do local part */
452   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
453   /* receive remote parts: note this assumes the values are not actually */
454   /* inserted in yy until the next line, which is true for my implementation*/
455   /* but is not perhaps always true. */
456   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
457                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
458   return 0;
459 }
460 
461 /*
462   This only works correctly for square matrices where the subblock A->A is the
463    diagonal block
464 */
465 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
466 {
467   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
468   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
469   return MatGetDiagonal(a->A,v);
470 }
471 
472 static int MatDestroy_MPIAIJ(PetscObject obj)
473 {
474   Mat        mat = (Mat) obj;
475   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
476   int        ierr;
477 #if defined(PETSC_LOG)
478   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
479 #endif
480   PETSCFREE(aij->rowners);
481   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
482   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
483   if (aij->colmap) PETSCFREE(aij->colmap);
484   if (aij->garray) PETSCFREE(aij->garray);
485   if (aij->lvec)   VecDestroy(aij->lvec);
486   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
487   PETSCFREE(aij);
488   PLogObjectDestroy(mat);
489   PETSCHEADERDESTROY(mat);
490   return 0;
491 }
492 #include "draw.h"
493 #include "pinclude/pviewer.h"
494 
495 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
496 {
497   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
498   int         ierr;
499 
500   if (aij->size == 1) {
501     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
502   }
503   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
504   return 0;
505 }
506 
507 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer)
508 {
509   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
510   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
511   int         ierr, format,shift = C->indexshift,rank;
512   PetscObject vobj = (PetscObject) viewer;
513   FILE        *fd;
514 
515   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
516     ierr = ViewerFileGetFormat_Private(viewer,&format);
517     if (format == FILE_FORMAT_INFO_DETAILED) {
518       int nz,nzalloc,mem;
519       MPI_Comm_rank(mat->comm,&rank);
520       ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
521       ierr = MatGetInfo(mat,MAT_LOCAL,&nz,&nzalloc,&mem);
522       MPIU_Seq_begin(mat->comm,1);
523       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d \n",rank,aij->m,nz,
524                 nzalloc,mem);
525       ierr = MatGetInfo(aij->A,MAT_LOCAL,&nz,&nzalloc,&mem);
526       fprintf(fd,"[%d] on diagonal nz %d \n",rank,nz);
527       ierr = MatGetInfo(aij->B,MAT_LOCAL,&nz,&nzalloc,&mem);
528       fprintf(fd,"[%d] off diagonal nz %d \n",rank,nz);
529       fflush(fd);
530       MPIU_Seq_end(mat->comm,1);
531       ierr = VecScatterView(aij->Mvctx,viewer);
532       return 0;
533     }
534     else if (format == FILE_FORMAT_INFO) {
535       return 0;
536     }
537   }
538 
539   if (vobj->type == ASCII_FILE_VIEWER) {
540     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
541     MPIU_Seq_begin(mat->comm,1);
542     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
543            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
544            aij->cend);
545     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
546     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
547     fflush(fd);
548     MPIU_Seq_end(mat->comm,1);
549   }
550   else {
551     int size = aij->size;
552     rank = aij->rank;
553     if (size == 1) {
554       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
555     }
556     else {
557       /* assemble the entire matrix onto first processor. */
558       Mat         A;
559       Mat_SeqAIJ *Aloc;
560       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
561       Scalar      *a;
562 
563       if (!rank) {
564         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
565       }
566       else {
567         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
568       }
569       PLogObjectParent(mat,A);
570 
571 
572       /* copy over the A part */
573       Aloc = (Mat_SeqAIJ*) aij->A->data;
574       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
575       row = aij->rstart;
576       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
577       for ( i=0; i<m; i++ ) {
578         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
579         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
580       }
581       aj = Aloc->j;
582       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
583 
584       /* copy over the B part */
585       Aloc = (Mat_SeqAIJ*) aij->B->data;
586       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
587       row = aij->rstart;
588       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
589       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
590       for ( i=0; i<m; i++ ) {
591         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
592         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
593       }
594       PETSCFREE(ct);
595       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
596       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
597       if (!rank) {
598         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
599       }
600       ierr = MatDestroy(A); CHKERRQ(ierr);
601     }
602   }
603   return 0;
604 }
605 
606 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
607 {
608   Mat         mat = (Mat) obj;
609   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
610   int         ierr;
611   PetscObject vobj = (PetscObject) viewer;
612 
613   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
614   if (!viewer) {
615     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
616   }
617   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
618   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
619     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
620   }
621   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
622     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
623   }
624   else if (vobj->cookie == DRAW_COOKIE) {
625     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
626   }
627   else if (vobj->type == BINARY_FILE_VIEWER) {
628     return MatView_MPIAIJ_Binary(mat,viewer);
629   }
630   return 0;
631 }
632 
633 extern int MatMarkDiag_SeqAIJ(Mat);
634 /*
635     This has to provide several versions.
636 
637      1) per sequential
638      2) a) use only local smoothing updating outer values only once.
639         b) local smoothing updating outer values each inner iteration
640      3) color updating out values betwen colors.
641 */
642 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
643                            double fshift,int its,Vec xx)
644 {
645   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
646   Mat        AA = mat->A, BB = mat->B;
647   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
648   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
649   int        ierr,*idx, *diag;
650   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
651   Vec        tt;
652 
653   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
654 
655   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
656   xs = x + shift; /* shift by one for index start of 1 */
657   ls = ls + shift;
658   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
659   diag = A->diag;
660   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
661     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
662   }
663   if (flag & SOR_EISENSTAT) {
664     /* Let  A = L + U + D; where L is lower trianglar,
665     U is upper triangular, E is diagonal; This routine applies
666 
667             (L + E)^{-1} A (U + E)^{-1}
668 
669     to a vector efficiently using Eisenstat's trick. This is for
670     the case of SSOR preconditioner, so E is D/omega where omega
671     is the relaxation factor.
672     */
673     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
674     PLogObjectParent(matin,tt);
675     VecGetArray(tt,&t);
676     scale = (2.0/omega) - 1.0;
677     /*  x = (E + U)^{-1} b */
678     VecSet(&zero,mat->lvec);
679     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
680                               mat->Mvctx); CHKERRQ(ierr);
681     for ( i=m-1; i>-1; i-- ) {
682       n    = A->i[i+1] - diag[i] - 1;
683       idx  = A->j + diag[i] + !shift;
684       v    = A->a + diag[i] + !shift;
685       sum  = b[i];
686       SPARSEDENSEMDOT(sum,xs,v,idx,n);
687       d    = fshift + A->a[diag[i]+shift];
688       n    = B->i[i+1] - B->i[i];
689       idx  = B->j + B->i[i] + shift;
690       v    = B->a + B->i[i] + shift;
691       SPARSEDENSEMDOT(sum,ls,v,idx,n);
692       x[i] = omega*(sum/d);
693     }
694     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
695                             mat->Mvctx); CHKERRQ(ierr);
696 
697     /*  t = b - (2*E - D)x */
698     v = A->a;
699     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
700 
701     /*  t = (E + L)^{-1}t */
702     ts = t + shift; /* shifted by one for index start of a or mat->j*/
703     diag = A->diag;
704     VecSet(&zero,mat->lvec);
705     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
706                                                  mat->Mvctx); CHKERRQ(ierr);
707     for ( i=0; i<m; i++ ) {
708       n    = diag[i] - A->i[i];
709       idx  = A->j + A->i[i] + shift;
710       v    = A->a + A->i[i] + shift;
711       sum  = t[i];
712       SPARSEDENSEMDOT(sum,ts,v,idx,n);
713       d    = fshift + A->a[diag[i]+shift];
714       n    = B->i[i+1] - B->i[i];
715       idx  = B->j + B->i[i] + shift;
716       v    = B->a + B->i[i] + shift;
717       SPARSEDENSEMDOT(sum,ls,v,idx,n);
718       t[i] = omega*(sum/d);
719     }
720     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
721                                                     mat->Mvctx); CHKERRQ(ierr);
722     /*  x = x + t */
723     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
724     VecDestroy(tt);
725     return 0;
726   }
727 
728 
729   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
730     if (flag & SOR_ZERO_INITIAL_GUESS) {
731       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
732     }
733     else {
734       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
735       CHKERRQ(ierr);
736       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
737       CHKERRQ(ierr);
738     }
739     while (its--) {
740       /* go down through the rows */
741       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
742                               mat->Mvctx); CHKERRQ(ierr);
743       for ( i=0; i<m; i++ ) {
744         n    = A->i[i+1] - A->i[i];
745         idx  = A->j + A->i[i] + shift;
746         v    = A->a + A->i[i] + shift;
747         sum  = b[i];
748         SPARSEDENSEMDOT(sum,xs,v,idx,n);
749         d    = fshift + A->a[diag[i]+shift];
750         n    = B->i[i+1] - B->i[i];
751         idx  = B->j + B->i[i] + shift;
752         v    = B->a + B->i[i] + shift;
753         SPARSEDENSEMDOT(sum,ls,v,idx,n);
754         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
755       }
756       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
757                             mat->Mvctx); CHKERRQ(ierr);
758       /* come up through the rows */
759       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
760                               mat->Mvctx); CHKERRQ(ierr);
761       for ( i=m-1; i>-1; i-- ) {
762         n    = A->i[i+1] - A->i[i];
763         idx  = A->j + A->i[i] + shift;
764         v    = A->a + A->i[i] + shift;
765         sum  = b[i];
766         SPARSEDENSEMDOT(sum,xs,v,idx,n);
767         d    = fshift + A->a[diag[i]+shift];
768         n    = B->i[i+1] - B->i[i];
769         idx  = B->j + B->i[i] + shift;
770         v    = B->a + B->i[i] + shift;
771         SPARSEDENSEMDOT(sum,ls,v,idx,n);
772         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
773       }
774       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
775                             mat->Mvctx); CHKERRQ(ierr);
776     }
777   }
778   else if (flag & SOR_FORWARD_SWEEP){
779     if (flag & SOR_ZERO_INITIAL_GUESS) {
780       VecSet(&zero,mat->lvec);
781       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
782                               mat->Mvctx); CHKERRQ(ierr);
783       for ( i=0; i<m; i++ ) {
784         n    = diag[i] - A->i[i];
785         idx  = A->j + A->i[i] + shift;
786         v    = A->a + A->i[i] + shift;
787         sum  = b[i];
788         SPARSEDENSEMDOT(sum,xs,v,idx,n);
789         d    = fshift + A->a[diag[i]+shift];
790         n    = B->i[i+1] - B->i[i];
791         idx  = B->j + B->i[i] + shift;
792         v    = B->a + B->i[i] + shift;
793         SPARSEDENSEMDOT(sum,ls,v,idx,n);
794         x[i] = omega*(sum/d);
795       }
796       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
797                             mat->Mvctx); CHKERRQ(ierr);
798       its--;
799     }
800     while (its--) {
801       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
802       CHKERRQ(ierr);
803       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
804       CHKERRQ(ierr);
805       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
806                               mat->Mvctx); CHKERRQ(ierr);
807       for ( i=0; i<m; i++ ) {
808         n    = A->i[i+1] - A->i[i];
809         idx  = A->j + A->i[i] + shift;
810         v    = A->a + A->i[i] + shift;
811         sum  = b[i];
812         SPARSEDENSEMDOT(sum,xs,v,idx,n);
813         d    = fshift + A->a[diag[i]+shift];
814         n    = B->i[i+1] - B->i[i];
815         idx  = B->j + B->i[i] + shift;
816         v    = B->a + B->i[i] + shift;
817         SPARSEDENSEMDOT(sum,ls,v,idx,n);
818         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
819       }
820       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
821                             mat->Mvctx); CHKERRQ(ierr);
822     }
823   }
824   else if (flag & SOR_BACKWARD_SWEEP){
825     if (flag & SOR_ZERO_INITIAL_GUESS) {
826       VecSet(&zero,mat->lvec);
827       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
828                               mat->Mvctx); CHKERRQ(ierr);
829       for ( i=m-1; i>-1; i-- ) {
830         n    = A->i[i+1] - diag[i] - 1;
831         idx  = A->j + diag[i] + !shift;
832         v    = A->a + diag[i] + !shift;
833         sum  = b[i];
834         SPARSEDENSEMDOT(sum,xs,v,idx,n);
835         d    = fshift + A->a[diag[i]+shift];
836         n    = B->i[i+1] - B->i[i];
837         idx  = B->j + B->i[i] + shift;
838         v    = B->a + B->i[i] + shift;
839         SPARSEDENSEMDOT(sum,ls,v,idx,n);
840         x[i] = omega*(sum/d);
841       }
842       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
843                             mat->Mvctx); CHKERRQ(ierr);
844       its--;
845     }
846     while (its--) {
847       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
848                             mat->Mvctx); CHKERRQ(ierr);
849       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
850                             mat->Mvctx); CHKERRQ(ierr);
851       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
852                               mat->Mvctx); CHKERRQ(ierr);
853       for ( i=m-1; i>-1; i-- ) {
854         n    = A->i[i+1] - A->i[i];
855         idx  = A->j + A->i[i] + shift;
856         v    = A->a + A->i[i] + shift;
857         sum  = b[i];
858         SPARSEDENSEMDOT(sum,xs,v,idx,n);
859         d    = fshift + A->a[diag[i]+shift];
860         n    = B->i[i+1] - B->i[i];
861         idx  = B->j + B->i[i] + shift;
862         v    = B->a + B->i[i] + shift;
863         SPARSEDENSEMDOT(sum,ls,v,idx,n);
864         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
865       }
866       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
867                             mat->Mvctx); CHKERRQ(ierr);
868     }
869   }
870   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
871     if (flag & SOR_ZERO_INITIAL_GUESS) {
872       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
873     }
874     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
875     CHKERRQ(ierr);
876     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
877     CHKERRQ(ierr);
878     while (its--) {
879       /* go down through the rows */
880       for ( i=0; i<m; i++ ) {
881         n    = A->i[i+1] - A->i[i];
882         idx  = A->j + A->i[i] + shift;
883         v    = A->a + A->i[i] + shift;
884         sum  = b[i];
885         SPARSEDENSEMDOT(sum,xs,v,idx,n);
886         d    = fshift + A->a[diag[i]+shift];
887         n    = B->i[i+1] - B->i[i];
888         idx  = B->j + B->i[i] + shift;
889         v    = B->a + B->i[i] + shift;
890         SPARSEDENSEMDOT(sum,ls,v,idx,n);
891         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
892       }
893       /* come up through the rows */
894       for ( i=m-1; i>-1; i-- ) {
895         n    = A->i[i+1] - A->i[i];
896         idx  = A->j + A->i[i] + shift;
897         v    = A->a + A->i[i] + shift;
898         sum  = b[i];
899         SPARSEDENSEMDOT(sum,xs,v,idx,n);
900         d    = fshift + A->a[diag[i]+shift];
901         n    = B->i[i+1] - B->i[i];
902         idx  = B->j + B->i[i] + shift;
903         v    = B->a + B->i[i] + shift;
904         SPARSEDENSEMDOT(sum,ls,v,idx,n);
905         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
906       }
907     }
908   }
909   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
910     if (flag & SOR_ZERO_INITIAL_GUESS) {
911       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
912     }
913     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
914     CHKERRQ(ierr);
915     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
916     CHKERRQ(ierr);
917     while (its--) {
918       for ( i=0; i<m; i++ ) {
919         n    = A->i[i+1] - A->i[i];
920         idx  = A->j + A->i[i] + shift;
921         v    = A->a + A->i[i] + shift;
922         sum  = b[i];
923         SPARSEDENSEMDOT(sum,xs,v,idx,n);
924         d    = fshift + A->a[diag[i]+shift];
925         n    = B->i[i+1] - B->i[i];
926         idx  = B->j + B->i[i] + shift;
927         v    = B->a + B->i[i] + shift;
928         SPARSEDENSEMDOT(sum,ls,v,idx,n);
929         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
930       }
931     }
932   }
933   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
934     if (flag & SOR_ZERO_INITIAL_GUESS) {
935       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
936     }
937     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
938                             mat->Mvctx); CHKERRQ(ierr);
939     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
940                             mat->Mvctx); CHKERRQ(ierr);
941     while (its--) {
942       for ( i=m-1; i>-1; i-- ) {
943         n    = A->i[i+1] - A->i[i];
944         idx  = A->j + A->i[i] + shift;
945         v    = A->a + A->i[i] + shift;
946         sum  = b[i];
947         SPARSEDENSEMDOT(sum,xs,v,idx,n);
948         d    = fshift + A->a[diag[i]+shift];
949         n    = B->i[i+1] - B->i[i];
950         idx  = B->j + B->i[i] + shift;
951         v    = B->a + B->i[i] + shift;
952         SPARSEDENSEMDOT(sum,ls,v,idx,n);
953         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
954       }
955     }
956   }
957   return 0;
958 }
959 
960 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
961                              int *nzalloc,int *mem)
962 {
963   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
964   Mat        A = mat->A, B = mat->B;
965   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
966 
967   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
968   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
969   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
970   if (flag == MAT_LOCAL) {
971     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
972   } else if (flag == MAT_GLOBAL_MAX) {
973     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
974     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
975   } else if (flag == MAT_GLOBAL_SUM) {
976     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
977     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
978   }
979   return 0;
980 }
981 
982 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
983 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
984 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
985 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
986 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
987 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
988 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
989 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
990 
991 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
992 {
993   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
994 
995   if (op == NO_NEW_NONZERO_LOCATIONS ||
996       op == YES_NEW_NONZERO_LOCATIONS ||
997       op == COLUMNS_SORTED ||
998       op == ROW_ORIENTED) {
999         MatSetOption(a->A,op);
1000         MatSetOption(a->B,op);
1001   }
1002   else if (op == ROWS_SORTED ||
1003            op == SYMMETRIC_MATRIX ||
1004            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
1005            op == YES_NEW_DIAGONALS)
1006     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
1007   else if (op == COLUMN_ORIENTED)
1008     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
1009   else if (op == NO_NEW_DIAGONALS)
1010     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
1011   else
1012     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
1013   return 0;
1014 }
1015 
1016 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1017 {
1018   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1019   *m = mat->M; *n = mat->N;
1020   return 0;
1021 }
1022 
1023 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1024 {
1025   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1026   *m = mat->m; *n = mat->N;
1027   return 0;
1028 }
1029 
1030 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1031 {
1032   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1033   *m = mat->rstart; *n = mat->rend;
1034   return 0;
1035 }
1036 
1037 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1038 {
1039   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1040   Scalar     *vworkA, *vworkB, **pvA, **pvB;
1041   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1042   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1043 
1044   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
1045   lrow = row - rstart;
1046 
1047   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1048   if (!v)   {pvA = 0; pvB = 0;}
1049   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1050   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1051   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1052   nztot = nzA + nzB;
1053 
1054   if (v  || idx) {
1055     if (nztot) {
1056       /* Sort by increasing column numbers, assuming A and B already sorted */
1057       int imark;
1058       if (mat->assembled) {
1059         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1060       }
1061       if (v) {
1062         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1063         for ( i=0; i<nzB; i++ ) {
1064           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1065           else break;
1066         }
1067         imark = i;
1068         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1069         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1070       }
1071       if (idx) {
1072         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1073         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1074         for ( i=0; i<nzB; i++ ) {
1075           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1076           else break;
1077         }
1078         imark = i;
1079         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1080         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1081       }
1082     }
1083     else {*idx = 0; *v=0;}
1084   }
1085   *nz = nztot;
1086   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1087   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1088   return 0;
1089 }
1090 
1091 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1092 {
1093   if (idx) PETSCFREE(*idx);
1094   if (v) PETSCFREE(*v);
1095   return 0;
1096 }
1097 
1098 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1099 {
1100   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1101   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1102   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1103   double     sum = 0.0;
1104   Scalar     *v;
1105 
1106   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1107   if (aij->size == 1) {
1108     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1109   } else {
1110     if (type == NORM_FROBENIUS) {
1111       v = amat->a;
1112       for (i=0; i<amat->nz; i++ ) {
1113 #if defined(PETSC_COMPLEX)
1114         sum += real(conj(*v)*(*v)); v++;
1115 #else
1116         sum += (*v)*(*v); v++;
1117 #endif
1118       }
1119       v = bmat->a;
1120       for (i=0; i<bmat->nz; i++ ) {
1121 #if defined(PETSC_COMPLEX)
1122         sum += real(conj(*v)*(*v)); v++;
1123 #else
1124         sum += (*v)*(*v); v++;
1125 #endif
1126       }
1127       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1128       *norm = sqrt(*norm);
1129     }
1130     else if (type == NORM_1) { /* max column norm */
1131       double *tmp, *tmp2;
1132       int    *jj, *garray = aij->garray;
1133       tmp  = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1134       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1135       PetscZero(tmp,aij->N*sizeof(double));
1136       *norm = 0.0;
1137       v = amat->a; jj = amat->j;
1138       for ( j=0; j<amat->nz; j++ ) {
1139 #if defined(PETSC_COMPLEX)
1140         tmp[cstart + *jj++ + shift] += abs(*v++);
1141 #else
1142         tmp[cstart + *jj++ + shift] += fabs(*v++);
1143 #endif
1144       }
1145       v = bmat->a; jj = bmat->j;
1146       for ( j=0; j<bmat->nz; j++ ) {
1147 #if defined(PETSC_COMPLEX)
1148         tmp[garray[*jj++ + shift]] += abs(*v++);
1149 #else
1150         tmp[garray[*jj++ + shift]] += fabs(*v++);
1151 #endif
1152       }
1153       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1154       for ( j=0; j<aij->N; j++ ) {
1155         if (tmp2[j] > *norm) *norm = tmp2[j];
1156       }
1157       PETSCFREE(tmp); PETSCFREE(tmp2);
1158     }
1159     else if (type == NORM_INFINITY) { /* max row norm */
1160       double ntemp = 0.0;
1161       for ( j=0; j<amat->m; j++ ) {
1162         v = amat->a + amat->i[j] + shift;
1163         sum = 0.0;
1164         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1165 #if defined(PETSC_COMPLEX)
1166           sum += abs(*v); v++;
1167 #else
1168           sum += fabs(*v); v++;
1169 #endif
1170         }
1171         v = bmat->a + bmat->i[j] + shift;
1172         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1173 #if defined(PETSC_COMPLEX)
1174           sum += abs(*v); v++;
1175 #else
1176           sum += fabs(*v); v++;
1177 #endif
1178         }
1179         if (sum > ntemp) ntemp = sum;
1180       }
1181       MPI_Allreduce((void*)&ntemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1182     }
1183     else {
1184       SETERRQ(1,"MatNorm_MPIAIJ:No support for two norm");
1185     }
1186   }
1187   return 0;
1188 }
1189 
1190 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1191 {
1192   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1193   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1194   int        ierr,shift = Aloc->indexshift;
1195   Mat        B;
1196   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1197   Scalar     *array;
1198 
1199   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1200   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1201   CHKERRQ(ierr);
1202 
1203   /* copy over the A part */
1204   Aloc = (Mat_SeqAIJ*) a->A->data;
1205   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1206   row = a->rstart;
1207   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1208   for ( i=0; i<m; i++ ) {
1209     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1210     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1211   }
1212   aj = Aloc->j;
1213   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1214 
1215   /* copy over the B part */
1216   Aloc = (Mat_SeqAIJ*) a->B->data;
1217   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1218   row = a->rstart;
1219   ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1220   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1221   for ( i=0; i<m; i++ ) {
1222     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1223     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1224   }
1225   PETSCFREE(ct);
1226   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1227   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1228   if (matout) {
1229     *matout = B;
1230   } else {
1231     /* This isn't really an in-place transpose .... but free data structures from a */
1232     PETSCFREE(a->rowners);
1233     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1234     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1235     if (a->colmap) PETSCFREE(a->colmap);
1236     if (a->garray) PETSCFREE(a->garray);
1237     if (a->lvec) VecDestroy(a->lvec);
1238     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1239     PETSCFREE(a);
1240     PetscMemcpy(A,B,sizeof(struct _Mat));
1241     PETSCHEADERDESTROY(B);
1242   }
1243   return 0;
1244 }
1245 
1246 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1247 static int MatCopyPrivate_MPIAIJ(Mat,Mat *,int);
1248 
1249 /* -------------------------------------------------------------------*/
1250 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1251        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1252        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1253        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1254        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1255        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1256        MatLUFactor_MPIAIJ,0,
1257        MatRelax_MPIAIJ,
1258        MatTranspose_MPIAIJ,
1259        MatGetInfo_MPIAIJ,0,
1260        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1261        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1262        0,
1263        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1264        MatGetReordering_MPIAIJ,
1265        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1266        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1267        MatILUFactorSymbolic_MPIAIJ,0,
1268        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1269 
1270 /*@C
1271    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1272    (the default parallel PETSc format).
1273 
1274    Input Parameters:
1275 .  comm - MPI communicator
1276 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1277 .  n - number of local columns (or PETSC_DECIDE to have calculated
1278            if N is given)
1279 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1280 .  N - number of global columns (or PETSC_DECIDE to have calculated
1281            if n is given)
1282 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1283            (same for all local rows)
1284 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1285            or null (possibly different for each row).  You must leave room
1286            for the diagonal entry even if it is zero.
1287 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1288            submatrix (same for all local rows).
1289 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1290            submatrix or null (possibly different for each row).
1291 
1292    Output Parameter:
1293 .  newmat - the matrix
1294 
1295    Notes:
1296    The AIJ format (also called the Yale sparse matrix format or
1297    compressed row storage), is fully compatible with standard Fortran 77
1298    storage.  That is, the stored row and column indices can begin at
1299    either one (as in Fortran) or zero.  See the users manual for details.
1300 
1301    The user MUST specify either the local or global matrix dimensions
1302    (possibly both).
1303 
1304    Storage Information:
1305    For a square global matrix we define each processor's diagonal portion
1306    to be its local rows and the corresponding columns (a square submatrix);
1307    each processor's off-diagonal portion encompasses the remainder of the
1308    local matrix (a rectangular submatrix).
1309 
1310    The user can specify preallocated storage for the diagonal part of
1311    the local submatrix with either d_nz or d_nnz (not both). Set both
1312    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1313    Likewise, specify preallocated storage for the off-diagonal part of
1314    the local submatrix with o_nz or o_nnz (not both).
1315 
1316 .keywords: matrix, aij, compressed row, sparse, parallel
1317 
1318 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1319 @*/
1320 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1321                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1322 {
1323   Mat          mat;
1324   Mat_MPIAIJ   *a;
1325   int          ierr, i,sum[2],work[2];
1326 
1327   *newmat         = 0;
1328   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1329   PLogObjectCreate(mat);
1330   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1331   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1332   mat->destroy    = MatDestroy_MPIAIJ;
1333   mat->view       = MatView_MPIAIJ;
1334   mat->factor     = 0;
1335 
1336   a->insertmode = NOT_SET_VALUES;
1337   MPI_Comm_rank(comm,&a->rank);
1338   MPI_Comm_size(comm,&a->size);
1339 
1340   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1341     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1342 
1343   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1344     work[0] = m; work[1] = n;
1345     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1346     if (M == PETSC_DECIDE) M = sum[0];
1347     if (N == PETSC_DECIDE) N = sum[1];
1348   }
1349   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1350   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1351   a->m = m;
1352   a->n = n;
1353   a->N = N;
1354   a->M = M;
1355 
1356   /* build local table of row and column ownerships */
1357   a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1358   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
1359                        sizeof(Mat_MPIAIJ));
1360   a->cowners = a->rowners + a->size + 1;
1361   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1362   a->rowners[0] = 0;
1363   for ( i=2; i<=a->size; i++ ) {
1364     a->rowners[i] += a->rowners[i-1];
1365   }
1366   a->rstart = a->rowners[a->rank];
1367   a->rend   = a->rowners[a->rank+1];
1368   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1369   a->cowners[0] = 0;
1370   for ( i=2; i<=a->size; i++ ) {
1371     a->cowners[i] += a->cowners[i-1];
1372   }
1373   a->cstart = a->cowners[a->rank];
1374   a->cend   = a->cowners[a->rank+1];
1375 
1376   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1377   PLogObjectParent(mat,a->A);
1378   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1379   PLogObjectParent(mat,a->B);
1380 
1381   /* build cache for off array entries formed */
1382   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1383   a->colmap    = 0;
1384   a->garray    = 0;
1385 
1386   /* stuff used for matrix vector multiply */
1387   a->lvec      = 0;
1388   a->Mvctx     = 0;
1389   a->assembled = 0;
1390 
1391   *newmat = mat;
1392   return 0;
1393 }
1394 
1395 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat,int cpvalues)
1396 {
1397   Mat        mat;
1398   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1399   int        ierr, len;
1400 
1401   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1402   *newmat       = 0;
1403   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1404   PLogObjectCreate(mat);
1405   mat->data     = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1406   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1407   mat->destroy  = MatDestroy_MPIAIJ;
1408   mat->view     = MatView_MPIAIJ;
1409   mat->factor   = matin->factor;
1410 
1411   a->m          = oldmat->m;
1412   a->n          = oldmat->n;
1413   a->M          = oldmat->M;
1414   a->N          = oldmat->N;
1415 
1416   a->assembled  = 1;
1417   a->rstart     = oldmat->rstart;
1418   a->rend       = oldmat->rend;
1419   a->cstart     = oldmat->cstart;
1420   a->cend       = oldmat->cend;
1421   a->size       = oldmat->size;
1422   a->rank       = oldmat->rank;
1423   a->insertmode = NOT_SET_VALUES;
1424 
1425   a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1426   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1427   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1428   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1429   if (oldmat->colmap) {
1430     a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1431     PLogObjectMemory(mat,(a->N)*sizeof(int));
1432     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1433   } else a->colmap = 0;
1434   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1435     a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray);
1436     PLogObjectMemory(mat,len*sizeof(int));
1437     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1438   } else a->garray = 0;
1439 
1440   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1441   PLogObjectParent(mat,a->lvec);
1442   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1443   PLogObjectParent(mat,a->Mvctx);
1444   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1445   PLogObjectParent(mat,a->A);
1446   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1447   PLogObjectParent(mat,a->B);
1448   *newmat = mat;
1449   return 0;
1450 }
1451 
1452 #include "sysio.h"
1453 
1454 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1455 {
1456   Mat          A;
1457   int          i, nz, ierr, j,rstart, rend, fd;
1458   Scalar       *vals,*svals;
1459   PetscObject  vobj = (PetscObject) bview;
1460   MPI_Comm     comm = vobj->comm;
1461   MPI_Status   status;
1462   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1463   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1464   int          tag = ((PetscObject)bview)->tag;
1465 
1466   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1467   if (!rank) {
1468     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1469     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1470     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1471   }
1472 
1473   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1474   M = header[1]; N = header[2];
1475   /* determine ownership of all rows */
1476   m = M/size + ((M % size) > rank);
1477   rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners);
1478   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1479   rowners[0] = 0;
1480   for ( i=2; i<=size; i++ ) {
1481     rowners[i] += rowners[i-1];
1482   }
1483   rstart = rowners[rank];
1484   rend   = rowners[rank+1];
1485 
1486   /* distribute row lengths to all processors */
1487   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1488   offlens = ourlens + (rend-rstart);
1489   if (!rank) {
1490     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
1491     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1492     sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts);
1493     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1494     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1495     PETSCFREE(sndcounts);
1496   }
1497   else {
1498     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1499   }
1500 
1501   if (!rank) {
1502     /* calculate the number of nonzeros on each processor */
1503     procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz);
1504     PetscZero(procsnz,size*sizeof(int));
1505     for ( i=0; i<size; i++ ) {
1506       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1507         procsnz[i] += rowlengths[j];
1508       }
1509     }
1510     PETSCFREE(rowlengths);
1511 
1512     /* determine max buffer needed and allocate it */
1513     maxnz = 0;
1514     for ( i=0; i<size; i++ ) {
1515       maxnz = PETSCMAX(maxnz,procsnz[i]);
1516     }
1517     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
1518 
1519     /* read in my part of the matrix column indices  */
1520     nz = procsnz[0];
1521     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1522     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1523 
1524     /* read in every one elses and ship off */
1525     for ( i=1; i<size; i++ ) {
1526       nz = procsnz[i];
1527       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1528       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1529     }
1530     PETSCFREE(cols);
1531   }
1532   else {
1533     /* determine buffer space needed for message */
1534     nz = 0;
1535     for ( i=0; i<m; i++ ) {
1536       nz += ourlens[i];
1537     }
1538     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1539 
1540     /* receive message of column indices*/
1541     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1542     MPI_Get_count(&status,MPI_INT,&maxnz);
1543     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1544   }
1545 
1546   /* loop over local rows, determining number of off diagonal entries */
1547   PetscZero(offlens,m*sizeof(int));
1548   jj = 0;
1549   for ( i=0; i<m; i++ ) {
1550     for ( j=0; j<ourlens[i]; j++ ) {
1551       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1552       jj++;
1553     }
1554   }
1555 
1556   /* create our matrix */
1557   for ( i=0; i<m; i++ ) {
1558     ourlens[i] -= offlens[i];
1559   }
1560   if (type == MATMPIAIJ) {
1561     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1562   }
1563   else if (type == MATMPIROW) {
1564     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1565   }
1566   A = *newmat;
1567   MatSetOption(A,COLUMNS_SORTED);
1568   for ( i=0; i<m; i++ ) {
1569     ourlens[i] += offlens[i];
1570   }
1571 
1572   if (!rank) {
1573     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1574 
1575     /* read in my part of the matrix numerical values  */
1576     nz = procsnz[0];
1577     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1578 
1579     /* insert into matrix */
1580     jj      = rstart;
1581     smycols = mycols;
1582     svals   = vals;
1583     for ( i=0; i<m; i++ ) {
1584       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1585       smycols += ourlens[i];
1586       svals   += ourlens[i];
1587       jj++;
1588     }
1589 
1590     /* read in other processors and ship out */
1591     for ( i=1; i<size; i++ ) {
1592       nz = procsnz[i];
1593       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1594       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1595     }
1596     PETSCFREE(procsnz);
1597   }
1598   else {
1599     /* receive numeric values */
1600     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1601 
1602     /* receive message of values*/
1603     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1604     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1605     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1606 
1607     /* insert into matrix */
1608     jj      = rstart;
1609     smycols = mycols;
1610     svals   = vals;
1611     for ( i=0; i<m; i++ ) {
1612       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1613       smycols += ourlens[i];
1614       svals   += ourlens[i];
1615       jj++;
1616     }
1617   }
1618   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
1619 
1620   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1621   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1622   return 0;
1623 }
1624