xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 8965ea79aaf4087e157537e1c4a1c2d61645c0b7)
1 #ifndef lint
2 static char vcid[] = "$Id: mpiaij.c,v 1.90 1995/10/17 02:21:15 curfman Exp curfman $";
3 #endif
4 
5 #include "mpidense.h"
6 #include "vec/vecimpl.h"
7 #include "inline/spops.h"
8 
9 static int MatSetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,
10                                int *idxn,Scalar *v,InsertMode addv)
11 {
12   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
13   Mat_SeqAIJ *C = (Mat_SeqAIJ*) aij->A->data;
14   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
15   int        cstart = aij->cstart, cend = aij->cend,row,col;
16   int        shift = C->indexshift;
17 
18   if (aij->insertmode != NOT_SET_VALUES && aij->insertmode != addv) {
19     SETERRQ(1,"MatSetValues_MPIAIJ:Cannot mix inserts and adds");
20   }
21   aij->insertmode = addv;
22   for ( i=0; i<m; i++ ) {
23     if (idxm[i] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative row");
24     if (idxm[i] >= aij->M) SETERRQ(1,"MatSetValues_MPIAIJ:Row too large");
25     if (idxm[i] >= rstart && idxm[i] < rend) {
26       row = idxm[i] - rstart;
27       for ( j=0; j<n; j++ ) {
28         if (idxn[j] < 0) SETERRQ(1,"MatSetValues_MPIAIJ:Negative column");
29         if (idxn[j] >= aij->N) SETERRQ(1,"MatSetValues_MPIAIJ:Col too large");
30         if (idxn[j] >= cstart && idxn[j] < cend){
31           col = idxn[j] - cstart;
32           ierr = MatSetValues(aij->A,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
33         }
34         else {
35           if (aij->assembled) {
36             if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
37             col = aij->colmap[idxn[j]] + shift;
38             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
39               ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
40               col =  idxn[j];
41             }
42           }
43           else col = idxn[j];
44           ierr = MatSetValues(aij->B,1,&row,1,&col,v+i*n+j,addv);CHKERRQ(ierr);
45         }
46       }
47     }
48     else {
49       ierr = StashValues_Private(&aij->stash,idxm[i],n,idxn,v+i*n,addv);CHKERRQ(ierr);
50     }
51   }
52   return 0;
53 }
54 
55 static int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
56 {
57   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
58   MPI_Comm    comm = mat->comm;
59   int         size = aij->size, *owners = aij->rowners;
60   int         rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr;
61   MPI_Request *send_waits,*recv_waits;
62   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
63   InsertMode  addv;
64   Scalar      *rvalues,*svalues;
65 
66   /* make sure all processors are either in INSERTMODE or ADDMODE */
67   MPI_Allreduce(&aij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
68   if (addv == (ADD_VALUES|INSERT_VALUES)) {
69     SETERRQ(1,"MatAssemblyBegin_MPIAIJ:Some processors inserted others added");
70   }
71   aij->insertmode = addv; /* in case this processor had no cache */
72 
73   /*  first count number of contributors to each processor */
74   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
75   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
76   owner = (int *) PETSCMALLOC( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
77   for ( i=0; i<aij->stash.n; i++ ) {
78     idx = aij->stash.idx[i];
79     for ( j=0; j<size; j++ ) {
80       if (idx >= owners[j] && idx < owners[j+1]) {
81         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
82       }
83     }
84   }
85   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
86 
87   /* inform other processors of number of messages and max length*/
88   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
89   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
90   nreceives = work[rank];
91   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
92   nmax = work[rank];
93   PETSCFREE(work);
94 
95   /* post receives:
96        1) each message will consist of ordered pairs
97      (global index,value) we store the global index as a double
98      to simplify the message passing.
99        2) since we don't know how long each individual message is we
100      allocate the largest needed buffer for each receive. Potentially
101      this is a lot of wasted space.
102 
103 
104        This could be done better.
105   */
106   rvalues = (Scalar *) PETSCMALLOC(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
107   CHKPTRQ(rvalues);
108   recv_waits = (MPI_Request *) PETSCMALLOC((nreceives+1)*sizeof(MPI_Request));
109   CHKPTRQ(recv_waits);
110   for ( i=0; i<nreceives; i++ ) {
111     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
112               comm,recv_waits+i);
113   }
114 
115   /* do sends:
116       1) starts[i] gives the starting index in svalues for stuff going to
117          the ith processor
118   */
119   svalues = (Scalar *) PETSCMALLOC(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
120   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
121   CHKPTRQ(send_waits);
122   starts = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(starts);
123   starts[0] = 0;
124   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
125   for ( i=0; i<aij->stash.n; i++ ) {
126     svalues[3*starts[owner[i]]]       = (Scalar)  aij->stash.idx[i];
127     svalues[3*starts[owner[i]]+1]     = (Scalar)  aij->stash.idy[i];
128     svalues[3*(starts[owner[i]]++)+2] =  aij->stash.array[i];
129   }
130   PETSCFREE(owner);
131   starts[0] = 0;
132   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
133   count = 0;
134   for ( i=0; i<size; i++ ) {
135     if (procs[i]) {
136       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
137                 comm,send_waits+count++);
138     }
139   }
140   PETSCFREE(starts); PETSCFREE(nprocs);
141 
142   /* Free cache space */
143   ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr);
144 
145   aij->svalues    = svalues;    aij->rvalues    = rvalues;
146   aij->nsends     = nsends;     aij->nrecvs     = nreceives;
147   aij->send_waits = send_waits; aij->recv_waits = recv_waits;
148   aij->rmax       = nmax;
149 
150   return 0;
151 }
152 extern int MatSetUpMultiply_MPIAIJ(Mat);
153 
154 static int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
155 {
156   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
157   Mat_SeqAIJ *C = (Mat_SeqAIJ *) aij->A->data;
158   MPI_Status  *send_status,recv_status;
159   int         imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr;
160   int         row,col,other_disassembled,shift = C->indexshift;
161   Scalar      *values,val;
162   InsertMode  addv = aij->insertmode;
163 
164   /*  wait on receives */
165   while (count) {
166     MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);
167     /* unpack receives into our local space */
168     values = aij->rvalues + 3*imdex*aij->rmax;
169     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
170     n = n/3;
171     for ( i=0; i<n; i++ ) {
172       row = (int) PETSCREAL(values[3*i]) - aij->rstart;
173       col = (int) PETSCREAL(values[3*i+1]);
174       val = values[3*i+2];
175       if (col >= aij->cstart && col < aij->cend) {
176         col -= aij->cstart;
177         MatSetValues(aij->A,1,&row,1,&col,&val,addv);
178       }
179       else {
180         if (aij->assembled) {
181           if (!aij->colmap) {ierr = CreateColmap_Private(mat);CHKERRQ(ierr);}
182           col = aij->colmap[col] + shift;
183           if (col < 0  && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
184             ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
185             col = (int) PETSCREAL(values[3*i+1]);
186           }
187         }
188         MatSetValues(aij->B,1,&row,1,&col,&val,addv);
189       }
190     }
191     count--;
192   }
193   PETSCFREE(aij->recv_waits); PETSCFREE(aij->rvalues);
194 
195   /* wait on sends */
196   if (aij->nsends) {
197     send_status = (MPI_Status *) PETSCMALLOC(aij->nsends*sizeof(MPI_Status));
198     CHKPTRQ(send_status);
199     MPI_Waitall(aij->nsends,aij->send_waits,send_status);
200     PETSCFREE(send_status);
201   }
202   PETSCFREE(aij->send_waits); PETSCFREE(aij->svalues);
203 
204   aij->insertmode = NOT_SET_VALUES;
205   ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr);
206   ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr);
207 
208   /* determine if any processor has disassembled, if so we must
209      also disassemble ourselfs, in order that we may reassemble. */
210   MPI_Allreduce(&aij->assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
211   if (aij->assembled && !other_disassembled) {
212     ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr);
213   }
214 
215   if (!aij->assembled && mode == FINAL_ASSEMBLY) {
216     ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr);
217     aij->assembled = 1;
218   }
219   ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr);
220   ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr);
221 
222   return 0;
223 }
224 
225 static int MatZeroEntries_MPIAIJ(Mat A)
226 {
227   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
228   int        ierr;
229   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
230   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
231   return 0;
232 }
233 
234 /* the code does not do the diagonal entries correctly unless the
235    matrix is square and the column and row owerships are identical.
236    This is a BUG. The only way to fix it seems to be to access
237    aij->A and aij->B directly and not through the MatZeroRows()
238    routine.
239 */
240 static int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
241 {
242   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
243   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
244   int            *procs,*nprocs,j,found,idx,nsends,*work;
245   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
246   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
247   int            *lens,imdex,*lrows,*values;
248   MPI_Comm       comm = A->comm;
249   MPI_Request    *send_waits,*recv_waits;
250   MPI_Status     recv_status,*send_status;
251   IS             istmp;
252 
253   if (!l->assembled) SETERRQ(1,"MatZeroRows_MPIAIJ:Must assemble matrix");
254   ierr = ISGetLocalSize(is,&N); CHKERRQ(ierr);
255   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
256 
257   /*  first count number of contributors to each processor */
258   nprocs = (int *) PETSCMALLOC( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
259   PetscZero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
260   owner = (int *) PETSCMALLOC((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
261   for ( i=0; i<N; i++ ) {
262     idx = rows[i];
263     found = 0;
264     for ( j=0; j<size; j++ ) {
265       if (idx >= owners[j] && idx < owners[j+1]) {
266         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
267       }
268     }
269     if (!found) SETERRQ(1,"MatZeroRows_MPIAIJ:Index out of range");
270   }
271   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
272 
273   /* inform other processors of number of messages and max length*/
274   work = (int *) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(work);
275   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
276   nrecvs = work[rank];
277   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
278   nmax = work[rank];
279   PETSCFREE(work);
280 
281   /* post receives:   */
282   rvalues = (int *) PETSCMALLOC((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
283   CHKPTRQ(rvalues);
284   recv_waits = (MPI_Request *) PETSCMALLOC((nrecvs+1)*sizeof(MPI_Request));
285   CHKPTRQ(recv_waits);
286   for ( i=0; i<nrecvs; i++ ) {
287     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
288   }
289 
290   /* do sends:
291       1) starts[i] gives the starting index in svalues for stuff going to
292          the ith processor
293   */
294   svalues = (int *) PETSCMALLOC( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
295   send_waits = (MPI_Request *) PETSCMALLOC( (nsends+1)*sizeof(MPI_Request));
296   CHKPTRQ(send_waits);
297   starts = (int *) PETSCMALLOC( (size+1)*sizeof(int) ); CHKPTRQ(starts);
298   starts[0] = 0;
299   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
300   for ( i=0; i<N; i++ ) {
301     svalues[starts[owner[i]]++] = rows[i];
302   }
303   ISRestoreIndices(is,&rows);
304 
305   starts[0] = 0;
306   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
307   count = 0;
308   for ( i=0; i<size; i++ ) {
309     if (procs[i]) {
310       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
311     }
312   }
313   PETSCFREE(starts);
314 
315   base = owners[rank];
316 
317   /*  wait on receives */
318   lens   = (int *) PETSCMALLOC( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
319   source = lens + nrecvs;
320   count  = nrecvs; slen = 0;
321   while (count) {
322     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
323     /* unpack receives into our local space */
324     MPI_Get_count(&recv_status,MPI_INT,&n);
325     source[imdex]  = recv_status.MPI_SOURCE;
326     lens[imdex]  = n;
327     slen += n;
328     count--;
329   }
330   PETSCFREE(recv_waits);
331 
332   /* move the data into the send scatter */
333   lrows = (int *) PETSCMALLOC( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
334   count = 0;
335   for ( i=0; i<nrecvs; i++ ) {
336     values = rvalues + i*nmax;
337     for ( j=0; j<lens[i]; j++ ) {
338       lrows[count++] = values[j] - base;
339     }
340   }
341   PETSCFREE(rvalues); PETSCFREE(lens);
342   PETSCFREE(owner); PETSCFREE(nprocs);
343 
344   /* actually zap the local rows */
345   ierr = ISCreateSeq(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
346   PLogObjectParent(A,istmp);
347   PETSCFREE(lrows);
348   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
349   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
350   ierr = ISDestroy(istmp); CHKERRQ(ierr);
351 
352   /* wait on sends */
353   if (nsends) {
354     send_status = (MPI_Status *) PETSCMALLOC(nsends*sizeof(MPI_Status));
355     CHKPTRQ(send_status);
356     MPI_Waitall(nsends,send_waits,send_status);
357     PETSCFREE(send_status);
358   }
359   PETSCFREE(send_waits); PETSCFREE(svalues);
360 
361   return 0;
362 }
363 
364 static int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
365 {
366   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
367   int        ierr;
368 
369   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
370   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
371   ierr = MatMult(a->A,xx,yy); CHKERRQ(ierr);
372   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
373   ierr = MatMultAdd(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
374   return 0;
375 }
376 
377 static int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
378 {
379   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
380   int        ierr;
381   if (!a->assembled) SETERRQ(1,"MatMult_MPIAIJ:must assemble matrix");
382   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
383   ierr = MatMultAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
384   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_ALL,a->Mvctx);CHKERRQ(ierr);
385   ierr = MatMultAdd(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
386   return 0;
387 }
388 
389 static int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
390 {
391   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
392   int        ierr;
393 
394   if (!a->assembled) SETERRQ(1,"MatMulTrans_MPIAIJ:must assemble matrix");
395   /* do nondiagonal part */
396   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
397   /* send it on its way */
398   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,
399                 (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
400   /* do local part */
401   ierr = MatMultTrans(a->A,xx,yy); CHKERRQ(ierr);
402   /* receive remote parts: note this assumes the values are not actually */
403   /* inserted in yy until the next line, which is true for my implementation*/
404   /* but is not perhaps always true. */
405   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,
406                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
407   return 0;
408 }
409 
410 static int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
411 {
412   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
413   int        ierr;
414 
415   if (!a->assembled) SETERRQ(1,"MatMulTransAdd_MPIAIJ:must assemble matrix");
416   /* do nondiagonal part */
417   ierr = MatMultTrans(a->B,xx,a->lvec); CHKERRQ(ierr);
418   /* send it on its way */
419   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,
420                  (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
421   /* do local part */
422   ierr = MatMultTransAdd(a->A,xx,yy,zz); CHKERRQ(ierr);
423   /* receive remote parts: note this assumes the values are not actually */
424   /* inserted in yy until the next line, which is true for my implementation*/
425   /* but is not perhaps always true. */
426   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,
427                   (ScatterMode)(SCATTER_ALL|SCATTER_REVERSE),a->Mvctx); CHKERRQ(ierr);
428   return 0;
429 }
430 
431 /*
432   This only works correctly for square matrices where the subblock A->A is the
433    diagonal block
434 */
435 static int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
436 {
437   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
438   if (!a->assembled) SETERRQ(1,"MatGetDiag_MPIAIJ:must assemble matrix");
439   return MatGetDiagonal(a->A,v);
440 }
441 
442 static int MatDestroy_MPIAIJ(PetscObject obj)
443 {
444   Mat        mat = (Mat) obj;
445   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
446   int        ierr;
447 #if defined(PETSC_LOG)
448   PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N);
449 #endif
450   PETSCFREE(aij->rowners);
451   ierr = MatDestroy(aij->A); CHKERRQ(ierr);
452   ierr = MatDestroy(aij->B); CHKERRQ(ierr);
453   if (aij->colmap) PETSCFREE(aij->colmap);
454   if (aij->garray) PETSCFREE(aij->garray);
455   if (aij->lvec)   VecDestroy(aij->lvec);
456   if (aij->Mvctx)  VecScatterCtxDestroy(aij->Mvctx);
457   PETSCFREE(aij);
458   PLogObjectDestroy(mat);
459   PETSCHEADERDESTROY(mat);
460   return 0;
461 }
462 #include "draw.h"
463 #include "pinclude/pviewer.h"
464 
465 static int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
466 {
467   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
468   int         ierr;
469 
470   if (aij->size == 1) {
471     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
472   }
473   else SETERRQ(1,"MatView_MPIAIJ_Binary:Only uniprocessor output supported");
474   return 0;
475 }
476 
477 static int MatView_MPIAIJ_ASCIIorDraw(Mat mat,Viewer viewer)
478 {
479   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
480   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
481   int         ierr, format,shift = C->indexshift;
482   PetscObject vobj = (PetscObject) viewer;
483   FILE        *fd;
484 
485   if (vobj->type == ASCII_FILE_VIEWER || vobj->type == ASCII_FILES_VIEWER) {
486     ierr = ViewerFileGetFormat_Private(viewer,&format);
487     if (format == FILE_FORMAT_INFO) {
488       return 0;
489     }
490   }
491 
492   if (vobj->type == ASCII_FILE_VIEWER) {
493     ierr = ViewerFileGetPointer_Private(viewer,&fd); CHKERRQ(ierr);
494     MPIU_Seq_begin(mat->comm,1);
495     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
496            aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart,
497            aij->cend);
498     ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
499     ierr = MatView(aij->B,viewer); CHKERRQ(ierr);
500     fflush(fd);
501     MPIU_Seq_end(mat->comm,1);
502   }
503   else {
504     int size = aij->size, rank = aij->rank;
505     if (size == 1) {
506       ierr = MatView(aij->A,viewer); CHKERRQ(ierr);
507     }
508     else {
509       /* assemble the entire matrix onto first processor. */
510       Mat         A;
511       Mat_SeqAIJ *Aloc;
512       int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
513       Scalar      *a;
514 
515       if (!rank) {
516         ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,0,0,0,&A);CHKERRQ(ierr);
517       }
518       else {
519         ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,0,0,0,&A);CHKERRQ(ierr);
520       }
521       PLogObjectParent(mat,A);
522 
523 
524       /* copy over the A part */
525       Aloc = (Mat_SeqAIJ*) aij->A->data;
526       m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
527       row = aij->rstart;
528       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
529       for ( i=0; i<m; i++ ) {
530         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
531         row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
532       }
533       aj = Aloc->j;
534       for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
535 
536       /* copy over the B part */
537       Aloc = (Mat_SeqAIJ*) aij->B->data;
538       m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
539       row = aij->rstart;
540       ct = cols = (int *) PETSCMALLOC( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols);
541       for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
542       for ( i=0; i<m; i++ ) {
543         ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
544         row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
545       }
546       PETSCFREE(ct);
547       ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
548       ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
549       if (!rank) {
550         ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
551       }
552       ierr = MatDestroy(A); CHKERRQ(ierr);
553     }
554   }
555   return 0;
556 }
557 
558 static int MatView_MPIAIJ(PetscObject obj,Viewer viewer)
559 {
560   Mat         mat = (Mat) obj;
561   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
562   int         ierr;
563   PetscObject vobj = (PetscObject) viewer;
564 
565   if (!aij->assembled) SETERRQ(1,"MatView_MPIAIJ:must assemble matrix");
566   if (!viewer) {
567     viewer = STDOUT_VIEWER_SELF; vobj = (PetscObject) viewer;
568   }
569   if (vobj->cookie == DRAW_COOKIE && vobj->type == NULLWINDOW) return 0;
570   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILE_VIEWER) {
571     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
572   }
573   else if (vobj->cookie == VIEWER_COOKIE && vobj->type == ASCII_FILES_VIEWER) {
574     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
575   }
576   else if (vobj->cookie == DRAW_COOKIE) {
577     ierr = MatView_MPIAIJ_ASCIIorDraw(mat,viewer); CHKERRQ(ierr);
578   }
579   else if (vobj->type == BINARY_FILE_VIEWER) {
580     return MatView_MPIAIJ_Binary(mat,viewer);
581   }
582   return 0;
583 }
584 
585 extern int MatMarkDiag_SeqAIJ(Mat);
586 /*
587     This has to provide several versions.
588 
589      1) per sequential
590      2) a) use only local smoothing updating outer values only once.
591         b) local smoothing updating outer values each inner iteration
592      3) color updating out values betwen colors.
593 */
594 static int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
595                            double fshift,int its,Vec xx)
596 {
597   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
598   Mat        AA = mat->A, BB = mat->B;
599   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
600   Scalar     zero = 0.0,*b,*x,*xs,*ls,d,*v,sum,scale,*t,*ts;
601   int        ierr,*idx, *diag;
602   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
603   Vec        tt;
604 
605   if (!mat->assembled) SETERRQ(1,"MatRelax_MPIAIJ:must assemble matrix");
606 
607   VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls);
608   xs = x + shift; /* shift by one for index start of 1 */
609   ls = ls + shift;
610   if (!A->diag) {if ((ierr = MatMarkDiag_SeqAIJ(AA))) return ierr;}
611   diag = A->diag;
612   if (flag == SOR_APPLY_UPPER || flag == SOR_APPLY_LOWER) {
613     SETERRQ(1,"MatRelax_MPIAIJ:Option not supported");
614   }
615   if (flag & SOR_EISENSTAT) {
616     /* Let  A = L + U + D; where L is lower trianglar,
617     U is upper triangular, E is diagonal; This routine applies
618 
619             (L + E)^{-1} A (U + E)^{-1}
620 
621     to a vector efficiently using Eisenstat's trick. This is for
622     the case of SSOR preconditioner, so E is D/omega where omega
623     is the relaxation factor.
624     */
625     ierr = VecDuplicate(xx,&tt); CHKERRQ(ierr);
626     PLogObjectParent(matin,tt);
627     VecGetArray(tt,&t);
628     scale = (2.0/omega) - 1.0;
629     /*  x = (E + U)^{-1} b */
630     VecSet(&zero,mat->lvec);
631     ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
632                               mat->Mvctx); CHKERRQ(ierr);
633     for ( i=m-1; i>-1; i-- ) {
634       n    = A->i[i+1] - diag[i] - 1;
635       idx  = A->j + diag[i] + !shift;
636       v    = A->a + diag[i] + !shift;
637       sum  = b[i];
638       SPARSEDENSEMDOT(sum,xs,v,idx,n);
639       d    = fshift + A->a[diag[i]+shift];
640       n    = B->i[i+1] - B->i[i];
641       idx  = B->j + B->i[i] + shift;
642       v    = B->a + B->i[i] + shift;
643       SPARSEDENSEMDOT(sum,ls,v,idx,n);
644       x[i] = omega*(sum/d);
645     }
646     ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
647                             mat->Mvctx); CHKERRQ(ierr);
648 
649     /*  t = b - (2*E - D)x */
650     v = A->a;
651     for ( i=0; i<m; i++ ) { t[i] = b[i] - scale*(v[*diag++ + shift])*x[i]; }
652 
653     /*  t = (E + L)^{-1}t */
654     ts = t + shift; /* shifted by one for index start of a or mat->j*/
655     diag = A->diag;
656     VecSet(&zero,mat->lvec);
657     ierr = VecPipelineBegin(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
658                                                  mat->Mvctx); CHKERRQ(ierr);
659     for ( i=0; i<m; i++ ) {
660       n    = diag[i] - A->i[i];
661       idx  = A->j + A->i[i] + shift;
662       v    = A->a + A->i[i] + shift;
663       sum  = t[i];
664       SPARSEDENSEMDOT(sum,ts,v,idx,n);
665       d    = fshift + A->a[diag[i]+shift];
666       n    = B->i[i+1] - B->i[i];
667       idx  = B->j + B->i[i] + shift;
668       v    = B->a + B->i[i] + shift;
669       SPARSEDENSEMDOT(sum,ls,v,idx,n);
670       t[i] = omega*(sum/d);
671     }
672     ierr = VecPipelineEnd(tt,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
673                                                     mat->Mvctx); CHKERRQ(ierr);
674     /*  x = x + t */
675     for ( i=0; i<m; i++ ) { x[i] += t[i]; }
676     VecDestroy(tt);
677     return 0;
678   }
679 
680 
681   if ((flag & SOR_SYMMETRIC_SWEEP) == SOR_SYMMETRIC_SWEEP){
682     if (flag & SOR_ZERO_INITIAL_GUESS) {
683       VecSet(&zero,mat->lvec); VecSet(&zero,xx);
684     }
685     else {
686       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
687       CHKERRQ(ierr);
688       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
689       CHKERRQ(ierr);
690     }
691     while (its--) {
692       /* go down through the rows */
693       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
694                               mat->Mvctx); CHKERRQ(ierr);
695       for ( i=0; i<m; i++ ) {
696         n    = A->i[i+1] - A->i[i];
697         idx  = A->j + A->i[i] + shift;
698         v    = A->a + A->i[i] + shift;
699         sum  = b[i];
700         SPARSEDENSEMDOT(sum,xs,v,idx,n);
701         d    = fshift + A->a[diag[i]+shift];
702         n    = B->i[i+1] - B->i[i];
703         idx  = B->j + B->i[i] + shift;
704         v    = B->a + B->i[i] + shift;
705         SPARSEDENSEMDOT(sum,ls,v,idx,n);
706         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
707       }
708       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
709                             mat->Mvctx); CHKERRQ(ierr);
710       /* come up through the rows */
711       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
712                               mat->Mvctx); CHKERRQ(ierr);
713       for ( i=m-1; i>-1; i-- ) {
714         n    = A->i[i+1] - A->i[i];
715         idx  = A->j + A->i[i] + shift;
716         v    = A->a + A->i[i] + shift;
717         sum  = b[i];
718         SPARSEDENSEMDOT(sum,xs,v,idx,n);
719         d    = fshift + A->a[diag[i]+shift];
720         n    = B->i[i+1] - B->i[i];
721         idx  = B->j + B->i[i] + shift;
722         v    = B->a + B->i[i] + shift;
723         SPARSEDENSEMDOT(sum,ls,v,idx,n);
724         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
725       }
726       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
727                             mat->Mvctx); CHKERRQ(ierr);
728     }
729   }
730   else if (flag & SOR_FORWARD_SWEEP){
731     if (flag & SOR_ZERO_INITIAL_GUESS) {
732       VecSet(&zero,mat->lvec);
733       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
734                               mat->Mvctx); CHKERRQ(ierr);
735       for ( i=0; i<m; i++ ) {
736         n    = diag[i] - A->i[i];
737         idx  = A->j + A->i[i] + shift;
738         v    = A->a + A->i[i] + shift;
739         sum  = b[i];
740         SPARSEDENSEMDOT(sum,xs,v,idx,n);
741         d    = fshift + A->a[diag[i]+shift];
742         n    = B->i[i+1] - B->i[i];
743         idx  = B->j + B->i[i] + shift;
744         v    = B->a + B->i[i] + shift;
745         SPARSEDENSEMDOT(sum,ls,v,idx,n);
746         x[i] = omega*(sum/d);
747       }
748       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
749                             mat->Mvctx); CHKERRQ(ierr);
750       its--;
751     }
752     while (its--) {
753       ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
754       CHKERRQ(ierr);
755       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_UP,mat->Mvctx);
756       CHKERRQ(ierr);
757       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
758                               mat->Mvctx); CHKERRQ(ierr);
759       for ( i=0; i<m; i++ ) {
760         n    = A->i[i+1] - A->i[i];
761         idx  = A->j + A->i[i] + shift;
762         v    = A->a + A->i[i] + shift;
763         sum  = b[i];
764         SPARSEDENSEMDOT(sum,xs,v,idx,n);
765         d    = fshift + A->a[diag[i]+shift];
766         n    = B->i[i+1] - B->i[i];
767         idx  = B->j + B->i[i] + shift;
768         v    = B->a + B->i[i] + shift;
769         SPARSEDENSEMDOT(sum,ls,v,idx,n);
770         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
771       }
772       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_DOWN,
773                             mat->Mvctx); CHKERRQ(ierr);
774     }
775   }
776   else if (flag & SOR_BACKWARD_SWEEP){
777     if (flag & SOR_ZERO_INITIAL_GUESS) {
778       VecSet(&zero,mat->lvec);
779       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
780                               mat->Mvctx); CHKERRQ(ierr);
781       for ( i=m-1; i>-1; i-- ) {
782         n    = A->i[i+1] - diag[i] - 1;
783         idx  = A->j + diag[i] + !shift;
784         v    = A->a + diag[i] + !shift;
785         sum  = b[i];
786         SPARSEDENSEMDOT(sum,xs,v,idx,n);
787         d    = fshift + A->a[diag[i]+shift];
788         n    = B->i[i+1] - B->i[i];
789         idx  = B->j + B->i[i] + shift;
790         v    = B->a + B->i[i] + shift;
791         SPARSEDENSEMDOT(sum,ls,v,idx,n);
792         x[i] = omega*(sum/d);
793       }
794       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
795                             mat->Mvctx); CHKERRQ(ierr);
796       its--;
797     }
798     while (its--) {
799       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
800                             mat->Mvctx); CHKERRQ(ierr);
801       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_DOWN,
802                             mat->Mvctx); CHKERRQ(ierr);
803       ierr = VecPipelineBegin(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
804                               mat->Mvctx); CHKERRQ(ierr);
805       for ( i=m-1; i>-1; i-- ) {
806         n    = A->i[i+1] - A->i[i];
807         idx  = A->j + A->i[i] + shift;
808         v    = A->a + A->i[i] + shift;
809         sum  = b[i];
810         SPARSEDENSEMDOT(sum,xs,v,idx,n);
811         d    = fshift + A->a[diag[i]+shift];
812         n    = B->i[i+1] - B->i[i];
813         idx  = B->j + B->i[i] + shift;
814         v    = B->a + B->i[i] + shift;
815         SPARSEDENSEMDOT(sum,ls,v,idx,n);
816         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
817       }
818       ierr = VecPipelineEnd(xx,mat->lvec,INSERT_VALUES,PIPELINE_UP,
819                             mat->Mvctx); CHKERRQ(ierr);
820     }
821   }
822   else if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
823     if (flag & SOR_ZERO_INITIAL_GUESS) {
824       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
825     }
826     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
827     CHKERRQ(ierr);
828     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
829     CHKERRQ(ierr);
830     while (its--) {
831       /* go down through the rows */
832       for ( i=0; i<m; i++ ) {
833         n    = A->i[i+1] - A->i[i];
834         idx  = A->j + A->i[i] + shift;
835         v    = A->a + A->i[i] + shift;
836         sum  = b[i];
837         SPARSEDENSEMDOT(sum,xs,v,idx,n);
838         d    = fshift + A->a[diag[i]+shift];
839         n    = B->i[i+1] - B->i[i];
840         idx  = B->j + B->i[i] + shift;
841         v    = B->a + B->i[i] + shift;
842         SPARSEDENSEMDOT(sum,ls,v,idx,n);
843         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
844       }
845       /* come up through the rows */
846       for ( i=m-1; i>-1; i-- ) {
847         n    = A->i[i+1] - A->i[i];
848         idx  = A->j + A->i[i] + shift;
849         v    = A->a + A->i[i] + shift;
850         sum  = b[i];
851         SPARSEDENSEMDOT(sum,xs,v,idx,n);
852         d    = fshift + A->a[diag[i]+shift];
853         n    = B->i[i+1] - B->i[i];
854         idx  = B->j + B->i[i] + shift;
855         v    = B->a + B->i[i] + shift;
856         SPARSEDENSEMDOT(sum,ls,v,idx,n);
857         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
858       }
859     }
860   }
861   else if (flag & SOR_LOCAL_FORWARD_SWEEP){
862     if (flag & SOR_ZERO_INITIAL_GUESS) {
863       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
864     }
865     ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
866     CHKERRQ(ierr);
867     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,mat->Mvctx);
868     CHKERRQ(ierr);
869     while (its--) {
870       for ( i=0; i<m; i++ ) {
871         n    = A->i[i+1] - A->i[i];
872         idx  = A->j + A->i[i] + shift;
873         v    = A->a + A->i[i] + shift;
874         sum  = b[i];
875         SPARSEDENSEMDOT(sum,xs,v,idx,n);
876         d    = fshift + A->a[diag[i]+shift];
877         n    = B->i[i+1] - B->i[i];
878         idx  = B->j + B->i[i] + shift;
879         v    = B->a + B->i[i] + shift;
880         SPARSEDENSEMDOT(sum,ls,v,idx,n);
881         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
882       }
883     }
884   }
885   else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
886     if (flag & SOR_ZERO_INITIAL_GUESS) {
887       return MatRelax(mat->A,bb,omega,flag,fshift,its,xx);
888     }
889     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
890                             mat->Mvctx); CHKERRQ(ierr);
891     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_ALL,
892                             mat->Mvctx); CHKERRQ(ierr);
893     while (its--) {
894       for ( i=m-1; i>-1; i-- ) {
895         n    = A->i[i+1] - A->i[i];
896         idx  = A->j + A->i[i] + shift;
897         v    = A->a + A->i[i] + shift;
898         sum  = b[i];
899         SPARSEDENSEMDOT(sum,xs,v,idx,n);
900         d    = fshift + A->a[diag[i]+shift];
901         n    = B->i[i+1] - B->i[i];
902         idx  = B->j + B->i[i] + shift;
903         v    = B->a + B->i[i] + shift;
904         SPARSEDENSEMDOT(sum,ls,v,idx,n);
905         x[i] = (1. - omega)*x[i] + omega*(sum/d + x[i]);
906       }
907     }
908   }
909   return 0;
910 }
911 
912 static int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,int *nz,
913                              int *nzalloc,int *mem)
914 {
915   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
916   Mat        A = mat->A, B = mat->B;
917   int        ierr, isend[3], irecv[3], nzA, nzallocA, memA;
918 
919   ierr = MatGetInfo(A,MAT_LOCAL,&nzA,&nzallocA,&memA); CHKERRQ(ierr);
920   ierr = MatGetInfo(B,MAT_LOCAL,&isend[0],&isend[1],&isend[2]); CHKERRQ(ierr);
921   isend[0] += nzA; isend[1] += nzallocA; isend[2] += memA;
922   if (flag == MAT_LOCAL) {
923     *nz = isend[0]; *nzalloc = isend[1]; *mem = isend[2];
924   } else if (flag == MAT_GLOBAL_MAX) {
925     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_MAX,matin->comm);
926     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
927   } else if (flag == MAT_GLOBAL_SUM) {
928     MPI_Allreduce( isend, irecv,3,MPI_INT,MPI_SUM,matin->comm);
929     *nz = irecv[0]; *nzalloc = irecv[1]; *mem = irecv[2];
930   }
931   return 0;
932 }
933 
934 extern int MatLUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,Mat*);
935 extern int MatLUFactorNumeric_MPIAIJ(Mat,Mat*);
936 extern int MatLUFactor_MPIAIJ(Mat,IS,IS,double);
937 extern int MatILUFactorSymbolic_MPIAIJ(Mat,IS,IS,double,int,Mat *);
938 extern int MatSolve_MPIAIJ(Mat,Vec,Vec);
939 extern int MatSolveAdd_MPIAIJ(Mat,Vec,Vec,Vec);
940 extern int MatSolveTrans_MPIAIJ(Mat,Vec,Vec);
941 extern int MatSolveTransAdd_MPIAIJ(Mat,Vec,Vec,Vec);
942 
943 static int MatSetOption_MPIAIJ(Mat A,MatOption op)
944 {
945   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
946 
947   if (op == NO_NEW_NONZERO_LOCATIONS ||
948       op == YES_NEW_NONZERO_LOCATIONS ||
949       op == COLUMNS_SORTED ||
950       op == ROW_ORIENTED) {
951         MatSetOption(a->A,op);
952         MatSetOption(a->B,op);
953   }
954   else if (op == ROWS_SORTED ||
955            op == SYMMETRIC_MATRIX ||
956            op == STRUCTURALLY_SYMMETRIC_MATRIX ||
957            op == YES_NEW_DIAGONALS)
958     PLogInfo((PetscObject)A,"Info:MatSetOption_MPIAIJ:Option ignored\n");
959   else if (op == COLUMN_ORIENTED)
960     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:COLUMN_ORIENTED");}
961   else if (op == NO_NEW_DIAGONALS)
962     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:NO_NEW_DIAGONALS");}
963   else
964     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_MPIAIJ:unknown option");}
965   return 0;
966 }
967 
968 static int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
969 {
970   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
971   *m = mat->M; *n = mat->N;
972   return 0;
973 }
974 
975 static int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
976 {
977   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
978   *m = mat->m; *n = mat->N;
979   return 0;
980 }
981 
982 static int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
983 {
984   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
985   *m = mat->rstart; *n = mat->rend;
986   return 0;
987 }
988 
989 static int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
990 {
991   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
992   Scalar     *vworkA, *vworkB, **pvA, **pvB;
993   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
994   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
995 
996   if (row < rstart || row >= rend) SETERRQ(1,"MatGetRow_MPIAIJ:only local rows")
997   lrow = row - rstart;
998 
999   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1000   if (!v)   {pvA = 0; pvB = 0;}
1001   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1002   ierr = MatGetRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1003   ierr = MatGetRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1004   nztot = nzA + nzB;
1005 
1006   if (v  || idx) {
1007     if (nztot) {
1008       /* Sort by increasing column numbers, assuming A and B already sorted */
1009       int imark;
1010       if (mat->assembled) {
1011         for (i=0; i<nzB; i++) cworkB[i] = mat->garray[cworkB[i]];
1012       }
1013       if (v) {
1014         *v = (Scalar *) PETSCMALLOC( (nztot)*sizeof(Scalar) ); CHKPTRQ(*v);
1015         for ( i=0; i<nzB; i++ ) {
1016           if (cworkB[i] < cstart)   (*v)[i] = vworkB[i];
1017           else break;
1018         }
1019         imark = i;
1020         for ( i=0; i<nzA; i++ )     (*v)[imark+i] = vworkA[i];
1021         for ( i=imark; i<nzB; i++ ) (*v)[nzA+i] = vworkB[i];
1022       }
1023       if (idx) {
1024         *idx = (int *) PETSCMALLOC( (nztot)*sizeof(int) ); CHKPTRQ(*idx);
1025         for (i=0; i<nzA; i++) cworkA[i] += cstart;
1026         for ( i=0; i<nzB; i++ ) {
1027           if (cworkB[i] < cstart)   (*idx)[i] = cworkB[i];
1028           else break;
1029         }
1030         imark = i;
1031         for ( i=0; i<nzA; i++ )     (*idx)[imark+i] = cworkA[i];
1032         for ( i=imark; i<nzB; i++ ) (*idx)[nzA+i] = cworkB[i];
1033       }
1034     }
1035     else {*idx = 0; *v=0;}
1036   }
1037   *nz = nztot;
1038   ierr = MatRestoreRow(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1039   ierr = MatRestoreRow(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1040   return 0;
1041 }
1042 
1043 static int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1044 {
1045   if (idx) PETSCFREE(*idx);
1046   if (v) PETSCFREE(*v);
1047   return 0;
1048 }
1049 
1050 static int MatNorm_MPIAIJ(Mat mat,MatNormType type,double *norm)
1051 {
1052   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1053   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1054   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1055   double     sum = 0.0;
1056   Scalar     *v;
1057 
1058   if (!aij->assembled) SETERRQ(1,"MatNorm_MPIAIJ:Must assemble matrix");
1059   if (aij->size == 1) {
1060     ierr =  MatNorm(aij->A,type,norm); CHKERRQ(ierr);
1061   } else {
1062     if (type == NORM_FROBENIUS) {
1063       v = amat->a;
1064       for (i=0; i<amat->nz; i++ ) {
1065 #if defined(PETSC_COMPLEX)
1066         sum += real(conj(*v)*(*v)); v++;
1067 #else
1068         sum += (*v)*(*v); v++;
1069 #endif
1070       }
1071       v = bmat->a;
1072       for (i=0; i<bmat->nz; i++ ) {
1073 #if defined(PETSC_COMPLEX)
1074         sum += real(conj(*v)*(*v)); v++;
1075 #else
1076         sum += (*v)*(*v); v++;
1077 #endif
1078       }
1079       MPI_Allreduce((void*)&sum,(void*)norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
1080       *norm = sqrt(*norm);
1081     }
1082     else if (type == NORM_1) { /* max column norm */
1083       double *tmp, *tmp2;
1084       int    *jj, *garray = aij->garray;
1085       tmp  = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp);
1086       tmp2 = (double *) PETSCMALLOC( aij->N*sizeof(double) ); CHKPTRQ(tmp2);
1087       PetscZero(tmp,aij->N*sizeof(double));
1088       *norm = 0.0;
1089       v = amat->a; jj = amat->j;
1090       for ( j=0; j<amat->nz; j++ ) {
1091 #if defined(PETSC_COMPLEX)
1092         tmp[cstart + *jj++ + shift] += abs(*v++);
1093 #else
1094         tmp[cstart + *jj++ + shift] += fabs(*v++);
1095 #endif
1096       }
1097       v = bmat->a; jj = bmat->j;
1098       for ( j=0; j<bmat->nz; j++ ) {
1099 #if defined(PETSC_COMPLEX)
1100         tmp[garray[*jj++ + shift]] += abs(*v++);
1101 #else
1102         tmp[garray[*jj++ + shift]] += fabs(*v++);
1103 #endif
1104       }
1105       MPI_Allreduce((void*)tmp,(void*)tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);
1106       for ( j=0; j<aij->N; j++ ) {
1107         if (tmp2[j] > *norm) *norm = tmp2[j];
1108       }
1109       PETSCFREE(tmp); PETSCFREE(tmp2);
1110     }
1111     else if (type == NORM_INFINITY) { /* max row norm */
1112       double normtemp = 0.0;
1113       for ( j=0; j<amat->m; j++ ) {
1114         v = amat->a + amat->i[j] + shift;
1115         sum = 0.0;
1116         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1117 #if defined(PETSC_COMPLEX)
1118           sum += abs(*v); v++;
1119 #else
1120           sum += fabs(*v); v++;
1121 #endif
1122         }
1123         v = bmat->a + bmat->i[j] + shift;
1124         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1125 #if defined(PETSC_COMPLEX)
1126           sum += abs(*v); v++;
1127 #else
1128           sum += fabs(*v); v++;
1129 #endif
1130         }
1131         if (sum > normtemp) normtemp = sum;
1132         MPI_Allreduce((void*)&normtemp,(void*)norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);
1133       }
1134     }
1135     else {
1136       SETERRQ(1,"MatNorm_MPIRow:No support for two norm");
1137     }
1138   }
1139   return 0;
1140 }
1141 
1142 static int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1145   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1146   int        ierr,shift = Aloc->indexshift;
1147   Mat        B;
1148   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1149   Scalar     *array;
1150 
1151   if (!matout && M != N) SETERRQ(1,"MatTranspose_MPIAIJ:Square only in-place");
1152   ierr = MatCreateMPIAIJ(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,0,0,0,0,&B);
1153   CHKERRQ(ierr);
1154 
1155   /* copy over the A part */
1156   Aloc = (Mat_SeqAIJ*) a->A->data;
1157   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1158   row = a->rstart;
1159   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1160   for ( i=0; i<m; i++ ) {
1161     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1162     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1163   }
1164   aj = Aloc->j;
1165   for ( i=0; i<ai[m]|+shift; i++ ) {aj[i] -= a->cstart + shift;}
1166 
1167   /* copy over the B part */
1168   Aloc = (Mat_SeqAIJ*) a->B->data;
1169   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1170   row = a->rstart;
1171   ct = cols = (int *) PETSCMALLOC( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols);
1172   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1173   for ( i=0; i<m; i++ ) {
1174     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1175     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1176   }
1177   PETSCFREE(ct);
1178   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1179   ierr = MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1180   if (matout) {
1181     *matout = B;
1182   } else {
1183     /* This isn't really an in-place transpose .... but free data structures from a */
1184     PETSCFREE(a->rowners);
1185     ierr = MatDestroy(a->A); CHKERRQ(ierr);
1186     ierr = MatDestroy(a->B); CHKERRQ(ierr);
1187     if (a->colmap) PETSCFREE(a->colmap);
1188     if (a->garray) PETSCFREE(a->garray);
1189     if (a->lvec) VecDestroy(a->lvec);
1190     if (a->Mvctx) VecScatterCtxDestroy(a->Mvctx);
1191     PETSCFREE(a);
1192     PetscMemcpy(A,B,sizeof(struct _Mat));
1193     PETSCHEADERDESTROY(B);
1194   }
1195   return 0;
1196 }
1197 
1198 extern int MatConvert_MPIAIJ(Mat,MatType,Mat *);
1199 static int MatCopyPrivate_MPIAIJ(Mat,Mat *);
1200 
1201 /* -------------------------------------------------------------------*/
1202 static struct _MatOps MatOps = {MatSetValues_MPIAIJ,
1203        MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ,
1204        MatMult_MPIAIJ,MatMultAdd_MPIAIJ,
1205        MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ,
1206        MatSolve_MPIAIJ,MatSolveAdd_MPIAIJ,
1207        MatSolveTrans_MPIAIJ,MatSolveTransAdd_MPIAIJ,
1208        MatLUFactor_MPIAIJ,0,
1209        MatRelax_MPIAIJ,
1210        MatTranspose_MPIAIJ,
1211        MatGetInfo_MPIAIJ,0,
1212        MatGetDiagonal_MPIAIJ,0,MatNorm_MPIAIJ,
1213        MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ,
1214        0,
1215        MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ,
1216        MatGetReordering_MPIAIJ,
1217        MatLUFactorSymbolic_MPIAIJ,MatLUFactorNumeric_MPIAIJ,0,0,
1218        MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ,
1219        MatILUFactorSymbolic_MPIAIJ,0,
1220        0,0,MatConvert_MPIAIJ,0,0,MatCopyPrivate_MPIAIJ};
1221 
1222 /*@C
1223    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1224    (the default parallel PETSc format).
1225 
1226    Input Parameters:
1227 .  comm - MPI communicator
1228 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1229 .  n - number of local columns (or PETSC_DECIDE to have calculated
1230            if N is given)
1231 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1232 .  N - number of global columns (or PETSC_DECIDE to have calculated
1233            if n is given)
1234 .  d_nz - number of nonzeros per row in diagonal portion of local submatrix
1235            (same for all local rows)
1236 .  d_nzz - number of nonzeros per row in diagonal portion of local submatrix
1237            or null (possibly different for each row).  You must leave room
1238            for the diagonal entry even if it is zero.
1239 .  o_nz - number of nonzeros per row in off-diagonal portion of local
1240            submatrix (same for all local rows).
1241 .  o_nzz - number of nonzeros per row in off-diagonal portion of local
1242            submatrix or null (possibly different for each row).
1243 
1244    Output Parameter:
1245 .  newmat - the matrix
1246 
1247    Notes:
1248    The AIJ format (also called the Yale sparse matrix format or
1249    compressed row storage), is fully compatible with standard Fortran 77
1250    storage.  That is, the stored row and column indices begin at
1251    one, not zero.  See the users manual for further details.
1252 
1253    The user MUST specify either the local or global matrix dimensions
1254    (possibly both).
1255 
1256    Storage Information:
1257    For a square global matrix we define each processor's diagonal portion
1258    to be its local rows and the corresponding columns (a square submatrix);
1259    each processor's off-diagonal portion encompasses the remainder of the
1260    local matrix (a rectangular submatrix).
1261 
1262    The user can specify preallocated storage for the diagonal part of
1263    the local submatrix with either d_nz or d_nnz (not both). Set both
1264    d_nz and d_nnz to zero for PETSc to control dynamic memory allocation.
1265    Likewise, specify preallocated storage for the off-diagonal part of
1266    the local submatrix with o_nz or o_nnz (not both).
1267 
1268 .keywords: matrix, aij, compressed row, sparse, parallel
1269 
1270 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1271 @*/
1272 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,
1273                     int d_nz,int *d_nnz, int o_nz,int *o_nnz,Mat *newmat)
1274 {
1275   Mat          mat;
1276   Mat_MPIAIJ   *a;
1277   int          ierr, i,sum[2],work[2];
1278 
1279   *newmat         = 0;
1280   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,comm);
1281   PLogObjectCreate(mat);
1282   mat->data       = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1283   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1284   mat->destroy    = MatDestroy_MPIAIJ;
1285   mat->view       = MatView_MPIAIJ;
1286   mat->factor     = 0;
1287 
1288   a->insertmode = NOT_SET_VALUES;
1289   MPI_Comm_rank(comm,&a->rank);
1290   MPI_Comm_size(comm,&a->size);
1291 
1292   if (m == PETSC_DECIDE && (d_nnz || o_nnz))
1293     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSc decide rows but set d_nnz or o_nnz");
1294 
1295   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1296     work[0] = m; work[1] = n;
1297     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
1298     if (M == PETSC_DECIDE) M = sum[0];
1299     if (N == PETSC_DECIDE) N = sum[1];
1300   }
1301   if (m == PETSC_DECIDE) {m = M/a->size + ((M % a->size) > a->rank);}
1302   if (n == PETSC_DECIDE) {n = N/a->size + ((N % a->size) > a->rank);}
1303   a->m = m;
1304   a->n = n;
1305   a->N = N;
1306   a->M = M;
1307 
1308   /* build local table of row and column ownerships */
1309   a->rowners = (int *) PETSCMALLOC(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1310   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+
1311                        sizeof(Mat_MPIAIJ));
1312   a->cowners = a->rowners + a->size + 1;
1313   MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1314   a->rowners[0] = 0;
1315   for ( i=2; i<=a->size; i++ ) {
1316     a->rowners[i] += a->rowners[i-1];
1317   }
1318   a->rstart = a->rowners[a->rank];
1319   a->rend   = a->rowners[a->rank+1];
1320   MPI_Allgather(&n,1,MPI_INT,a->cowners+1,1,MPI_INT,comm);
1321   a->cowners[0] = 0;
1322   for ( i=2; i<=a->size; i++ ) {
1323     a->cowners[i] += a->cowners[i-1];
1324   }
1325   a->cstart = a->cowners[a->rank];
1326   a->cend   = a->cowners[a->rank+1];
1327 
1328   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,n,d_nz,d_nnz,&a->A); CHKERRQ(ierr);
1329   PLogObjectParent(mat,a->A);
1330   ierr = MatCreateSeqAIJ(MPI_COMM_SELF,m,N,o_nz,o_nnz,&a->B); CHKERRQ(ierr);
1331   PLogObjectParent(mat,a->B);
1332 
1333   /* build cache for off array entries formed */
1334   ierr = StashBuild_Private(&a->stash); CHKERRQ(ierr);
1335   a->colmap    = 0;
1336   a->garray    = 0;
1337 
1338   /* stuff used for matrix vector multiply */
1339   a->lvec      = 0;
1340   a->Mvctx     = 0;
1341   a->assembled = 0;
1342 
1343   *newmat = mat;
1344   return 0;
1345 }
1346 
1347 static int MatCopyPrivate_MPIAIJ(Mat matin,Mat *newmat)
1348 {
1349   Mat        mat;
1350   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1351   int        ierr, len;
1352 
1353   if (!oldmat->assembled) SETERRQ(1,"MatCopyPrivate_MPIAIJ:Must assemble matrix");
1354   *newmat       = 0;
1355   PETSCHEADERCREATE(mat,_Mat,MAT_COOKIE,MATMPIAIJ,matin->comm);
1356   PLogObjectCreate(mat);
1357   mat->data     = (void *) (a = PETSCNEW(Mat_MPIAIJ)); CHKPTRQ(a);
1358   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
1359   mat->destroy  = MatDestroy_MPIAIJ;
1360   mat->view     = MatView_MPIAIJ;
1361   mat->factor   = matin->factor;
1362 
1363   a->m          = oldmat->m;
1364   a->n          = oldmat->n;
1365   a->M          = oldmat->M;
1366   a->N          = oldmat->N;
1367 
1368   a->assembled  = 1;
1369   a->rstart     = oldmat->rstart;
1370   a->rend       = oldmat->rend;
1371   a->cstart     = oldmat->cstart;
1372   a->cend       = oldmat->cend;
1373   a->size       = oldmat->size;
1374   a->rank       = oldmat->rank;
1375   a->insertmode = NOT_SET_VALUES;
1376 
1377   a->rowners = (int *) PETSCMALLOC((a->size+1)*sizeof(int)); CHKPTRQ(a->rowners);
1378   PLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIAIJ));
1379   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1380   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
1381   if (oldmat->colmap) {
1382     a->colmap = (int *) PETSCMALLOC((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1383     PLogObjectMemory(mat,(a->N)*sizeof(int));
1384     PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));
1385   } else a->colmap = 0;
1386   if (oldmat->garray && (len = ((Mat_SeqAIJ *) (oldmat->B->data))->n)) {
1387     a->garray = (int *) PETSCMALLOC(len*sizeof(int)); CHKPTRQ(a->garray);
1388     PLogObjectMemory(mat,len*sizeof(int));
1389     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
1390   } else a->garray = 0;
1391 
1392   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
1393   PLogObjectParent(mat,a->lvec);
1394   ierr =  VecScatterCtxCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
1395   PLogObjectParent(mat,a->Mvctx);
1396   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
1397   PLogObjectParent(mat,a->A);
1398   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
1399   PLogObjectParent(mat,a->B);
1400   *newmat = mat;
1401   return 0;
1402 }
1403 
1404 #include "sysio.h"
1405 
1406 int MatLoad_MPIAIJorMPIRow(Viewer bview,MatType type,Mat *newmat)
1407 {
1408   Mat          A;
1409   int          i, nz, ierr, j,rstart, rend, fd;
1410   Scalar       *vals,*svals;
1411   PetscObject  vobj = (PetscObject) bview;
1412   MPI_Comm     comm = vobj->comm;
1413   MPI_Status   status;
1414   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1415   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1416   int          tag = ((PetscObject)bview)->tag;
1417 
1418   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
1419   if (!rank) {
1420     ierr = ViewerFileGetDescriptor_Private(bview,&fd); CHKERRQ(ierr);
1421     ierr = SYRead(fd,(char *)header,4,SYINT); CHKERRQ(ierr);
1422     if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:not matrix object");
1423   }
1424 
1425   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1426   M = header[1]; N = header[2];
1427   /* determine ownership of all rows */
1428   m = M/size + ((M % size) > rank);
1429   rowners = (int *) PETSCMALLOC((size+2)*sizeof(int)); CHKPTRQ(rowners);
1430   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1431   rowners[0] = 0;
1432   for ( i=2; i<=size; i++ ) {
1433     rowners[i] += rowners[i-1];
1434   }
1435   rstart = rowners[rank];
1436   rend   = rowners[rank+1];
1437 
1438   /* distribute row lengths to all processors */
1439   ourlens = (int*) PETSCMALLOC( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens);
1440   offlens = ourlens + (rend-rstart);
1441   if (!rank) {
1442     rowlengths = (int*) PETSCMALLOC( M*sizeof(int) ); CHKPTRQ(rowlengths);
1443     ierr = SYRead(fd,rowlengths,M,SYINT); CHKERRQ(ierr);
1444     sndcounts = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(sndcounts);
1445     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
1446     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1447     PETSCFREE(sndcounts);
1448   }
1449   else {
1450     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);
1451   }
1452 
1453   if (!rank) {
1454     /* calculate the number of nonzeros on each processor */
1455     procsnz = (int*) PETSCMALLOC( size*sizeof(int) ); CHKPTRQ(procsnz);
1456     PetscZero(procsnz,size*sizeof(int));
1457     for ( i=0; i<size; i++ ) {
1458       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
1459         procsnz[i] += rowlengths[j];
1460       }
1461     }
1462     PETSCFREE(rowlengths);
1463 
1464     /* determine max buffer needed and allocate it */
1465     maxnz = 0;
1466     for ( i=0; i<size; i++ ) {
1467       maxnz = PETSCMAX(maxnz,procsnz[i]);
1468     }
1469     cols = (int *) PETSCMALLOC( maxnz*sizeof(int) ); CHKPTRQ(cols);
1470 
1471     /* read in my part of the matrix column indices  */
1472     nz = procsnz[0];
1473     mycols = (int *) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1474     ierr = SYRead(fd,mycols,nz,SYINT); CHKERRQ(ierr);
1475 
1476     /* read in every one elses and ship off */
1477     for ( i=1; i<size; i++ ) {
1478       nz = procsnz[i];
1479       ierr = SYRead(fd,cols,nz,SYINT); CHKERRQ(ierr);
1480       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1481     }
1482     PETSCFREE(cols);
1483   }
1484   else {
1485     /* determine buffer space needed for message */
1486     nz = 0;
1487     for ( i=0; i<m; i++ ) {
1488       nz += ourlens[i];
1489     }
1490     mycols = (int*) PETSCMALLOC( nz*sizeof(int) ); CHKPTRQ(mycols);
1491 
1492     /* receive message of column indices*/
1493     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1494     MPI_Get_count(&status,MPI_INT,&maxnz);
1495     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1496   }
1497 
1498   /* loop over local rows, determining number of off diagonal entries */
1499   PetscZero(offlens,m*sizeof(int));
1500   jj = 0;
1501   for ( i=0; i<m; i++ ) {
1502     for ( j=0; j<ourlens[i]; j++ ) {
1503       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1504       jj++;
1505     }
1506   }
1507 
1508   /* create our matrix */
1509   for ( i=0; i<m; i++ ) {
1510     ourlens[i] -= offlens[i];
1511   }
1512   if (type == MATMPIAIJ) {
1513     ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1514   }
1515   else if (type == MATMPIROW) {
1516     ierr = MatCreateMPIRow(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1517   }
1518   A = *newmat;
1519   MatSetOption(A,COLUMNS_SORTED);
1520   for ( i=0; i<m; i++ ) {
1521     ourlens[i] += offlens[i];
1522   }
1523 
1524   if (!rank) {
1525     vals = (Scalar *) PETSCMALLOC( maxnz*sizeof(Scalar) ); CHKPTRQ(vals);
1526 
1527     /* read in my part of the matrix numerical values  */
1528     nz = procsnz[0];
1529     ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1530 
1531     /* insert into matrix */
1532     jj      = rstart;
1533     smycols = mycols;
1534     svals   = vals;
1535     for ( i=0; i<m; i++ ) {
1536       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1537       smycols += ourlens[i];
1538       svals   += ourlens[i];
1539       jj++;
1540     }
1541 
1542     /* read in other processors and ship out */
1543     for ( i=1; i<size; i++ ) {
1544       nz = procsnz[i];
1545       ierr = SYRead(fd,vals,nz,SYSCALAR); CHKERRQ(ierr);
1546       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1547     }
1548     PETSCFREE(procsnz);
1549   }
1550   else {
1551     /* receive numeric values */
1552     vals = (Scalar*) PETSCMALLOC( nz*sizeof(Scalar) ); CHKPTRQ(vals);
1553 
1554     /* receive message of values*/
1555     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1556     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1557     if (maxnz != nz) SETERRQ(1,"MatLoad_MPIAIJorMPIRow:something is wrong with file");
1558 
1559     /* insert into matrix */
1560     jj      = rstart;
1561     smycols = mycols;
1562     svals   = vals;
1563     for ( i=0; i<m; i++ ) {
1564       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1565       smycols += ourlens[i];
1566       svals   += ourlens[i];
1567       jj++;
1568     }
1569   }
1570   PETSCFREE(ourlens); PETSCFREE(vals); PETSCFREE(mycols); PETSCFREE(rowners);
1571 
1572   ierr = MatAssemblyBegin(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1573   ierr = MatAssemblyEnd(A,FINAL_ASSEMBLY); CHKERRQ(ierr);
1574   return 0;
1575 }
1576