xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision b59f628eff0cd0541d0457de20841eec6c6bc428)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatDenseGetLocalMatrix"
12 /*@
13 
14       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
15               matrix that represents the operator. For sequential matrices it returns itself.
16 
17     Input Parameter:
18 .      A - the Seq or MPI dense matrix
19 
20     Output Parameter:
21 .      B - the inner matrix
22 
23     Level: intermediate
24 
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
27 {
28   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
29   PetscErrorCode ierr;
30   PetscBool      flg;
31 
32   PetscFunctionBegin;
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRow_MPIDense"
41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
42 {
43   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
44   PetscErrorCode ierr;
45   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
46 
47   PetscFunctionBegin;
48   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
49   lrow = row - rstart;
50   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatRestoreRow_MPIDense"
56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
62   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "MatSetValues_MPIDense"
100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101 {
102   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
103   PetscErrorCode ierr;
104   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105   PetscBool      roworiented = A->roworiented;
106 
107   PetscFunctionBegin;
108   for (i=0; i<m; i++) {
109     if (idxm[i] < 0) continue;
110     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
111     if (idxm[i] >= rstart && idxm[i] < rend) {
112       row = idxm[i] - rstart;
113       if (roworiented) {
114         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
115       } else {
116         for (j=0; j<n; j++) {
117           if (idxn[j] < 0) continue;
118           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
119           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
120         }
121       }
122     } else if (!A->donotstash) {
123       mat->assembled = PETSC_FALSE;
124       if (roworiented) {
125         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
126       } else {
127         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
128       }
129     }
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "MatGetValues_MPIDense"
136 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
137 {
138   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
141 
142   PetscFunctionBegin;
143   for (i=0; i<m; i++) {
144     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
145     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
146     if (idxm[i] >= rstart && idxm[i] < rend) {
147       row = idxm[i] - rstart;
148       for (j=0; j<n; j++) {
149         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
150         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
152       }
153     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatDenseGetArray_MPIDense"
160 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
161 {
162   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
163   PetscErrorCode ierr;
164 
165   PetscFunctionBegin;
166   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
172 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
173 {
174   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
175   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
176   PetscErrorCode ierr;
177   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
178   const PetscInt *irow,*icol;
179   PetscScalar    *av,*bv,*v = lmat->v;
180   Mat            newmat;
181   IS             iscol_local;
182 
183   PetscFunctionBegin;
184   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
185   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
186   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
187   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
189   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
190 
191   /* No parallel redistribution currently supported! Should really check each index set
192      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
193      original matrix! */
194 
195   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
196   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
197 
198   /* Check submatrix call */
199   if (scall == MAT_REUSE_MATRIX) {
200     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
201     /* Really need to test rows and column sizes! */
202     newmat = *B;
203   } else {
204     /* Create and fill new matrix */
205     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
206     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
207     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
208     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
209   }
210 
211   /* Now extract the data pointers and do the copy, column at a time */
212   newmatd = (Mat_MPIDense*)newmat->data;
213   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
214 
215   for (i=0; i<Ncols; i++) {
216     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
217     for (j=0; j<nrows; j++) {
218       *bv++ = av[irow[j] - rstart];
219     }
220   }
221 
222   /* Assemble the matrices so that the correct flags are set */
223   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225 
226   /* Free work space */
227   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
228   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
229   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
230   *B   = newmat;
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
236 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
237 {
238   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
239   PetscErrorCode ierr;
240 
241   PetscFunctionBegin;
242   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
248 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
249 {
250   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
251   MPI_Comm       comm;
252   PetscErrorCode ierr;
253   PetscInt       nstash,reallocs;
254   InsertMode     addv;
255 
256   PetscFunctionBegin;
257   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
258   /* make sure all processors are either in INSERTMODE or ADDMODE */
259   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
260   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
261   mat->insertmode = addv; /* in case this processor had no cache */
262 
263   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
264   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
265   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
271 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
272 {
273   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       i,*row,*col,flg,j,rstart,ncols;
276   PetscMPIInt    n;
277   PetscScalar    *val;
278   InsertMode     addv=mat->insertmode;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMult_MPIDense"
466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
467 {
468   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatMultAdd_MPIDense"
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
483   PetscErrorCode ierr;
484 
485   PetscFunctionBegin;
486   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
487   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatMultTranspose_MPIDense"
494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
495 {
496   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
497   PetscErrorCode ierr;
498   PetscScalar    zero = 0.0;
499 
500   PetscFunctionBegin;
501   ierr = VecSet(yy,zero);CHKERRQ(ierr);
502   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
503   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
504   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
517   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
518   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
519   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatGetDiagonal_MPIDense"
525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
526 {
527   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
528   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
529   PetscErrorCode ierr;
530   PetscInt       len,i,n,m = A->rmap->n,radd;
531   PetscScalar    *x,zero = 0.0;
532 
533   PetscFunctionBegin;
534   ierr = VecSet(v,zero);CHKERRQ(ierr);
535   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
536   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
537   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
538   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
539   radd = A->rmap->rstart*m;
540   for (i=0; i<len; i++) {
541     x[i] = aloc->v[radd + i*m + i];
542   }
543   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatDestroy_MPIDense"
549 PetscErrorCode MatDestroy_MPIDense(Mat mat)
550 {
551   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555 #if defined(PETSC_USE_LOG)
556   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
557 #endif
558   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
559   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
560   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
561   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
562 
563   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
565 
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
568 #if defined(PETSC_HAVE_ELEMENTAL)
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
570 #endif
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode    ierr;
588   PetscViewerFormat format;
589   int               fd;
590   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
591   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
592   PetscScalar       *work,*v,*vv;
593   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
594 
595   PetscFunctionBegin;
596   if (mdn->size == 1) {
597     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
598   } else {
599     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
600     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
601     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
602 
603     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
604     if (format == PETSC_VIEWER_NATIVE) {
605 
606       if (!rank) {
607         /* store the matrix as a dense matrix */
608         header[0] = MAT_FILE_CLASSID;
609         header[1] = mat->rmap->N;
610         header[2] = N;
611         header[3] = MATRIX_BINARY_FORMAT_DENSE;
612         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
613 
614         /* get largest work array needed for transposing array */
615         mmax = mat->rmap->n;
616         for (i=1; i<size; i++) {
617           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
618         }
619         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
620 
621         /* write out local array, by rows */
622         m = mat->rmap->n;
623         v = a->v;
624         for (j=0; j<N; j++) {
625           for (i=0; i<m; i++) {
626             work[j + i*N] = *v++;
627           }
628         }
629         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
630         /* get largest work array to receive messages from other processes, excludes process zero */
631         mmax = 0;
632         for (i=1; i<size; i++) {
633           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
634         }
635         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
636         for (k = 1; k < size; k++) {
637           v    = vv;
638           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
639           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640 
641           for (j = 0; j < N; j++) {
642             for (i = 0; i < m; i++) {
643               work[j + i*N] = *v++;
644             }
645           }
646           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
647         }
648         ierr = PetscFree(work);CHKERRQ(ierr);
649         ierr = PetscFree(vv);CHKERRQ(ierr);
650       } else {
651         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
652       }
653     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
659 #include <petscdraw.h>
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscBool         iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
682       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
685       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
686       PetscFunctionReturn(0);
687     } else if (format == PETSC_VIEWER_ASCII_INFO) {
688       PetscFunctionReturn(0);
689     }
690   } else if (isdraw) {
691     PetscDraw draw;
692     PetscBool isnull;
693 
694     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
695     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
696     if (isnull) PetscFunctionReturn(0);
697   }
698 
699   {
700     /* assemble the entire matrix onto first processor. */
701     Mat         A;
702     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
703     PetscInt    *cols;
704     PetscScalar *vals;
705 
706     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
707     if (!rank) {
708       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
709     } else {
710       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
711     }
712     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
713     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
714     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
715     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
716 
717     /* Copy the matrix ... This isn't the most efficient means,
718        but it's quick for now */
719     A->insertmode = INSERT_VALUES;
720 
721     row = mat->rmap->rstart;
722     m   = mdn->A->rmap->n;
723     for (i=0; i<m; i++) {
724       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
725       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
726       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
727       row++;
728     }
729 
730     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
732     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
733     if (!rank) {
734         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
735     }
736     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
737     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
738     ierr = MatDestroy(&A);CHKERRQ(ierr);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatView_MPIDense"
745 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
746 {
747   PetscErrorCode ierr;
748   PetscBool      iascii,isbinary,isdraw,issocket;
749 
750   PetscFunctionBegin;
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetInfo_MPIDense"
766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767 {
768   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769   Mat            mdn  = mat->A;
770   PetscErrorCode ierr;
771   PetscReal      isend[5],irecv[5];
772 
773   PetscFunctionBegin;
774   info->block_size = 1.0;
775 
776   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
777 
778   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779   isend[3] = info->memory;  isend[4] = info->mallocs;
780   if (flag == MAT_LOCAL) {
781     info->nz_used      = isend[0];
782     info->nz_allocated = isend[1];
783     info->nz_unneeded  = isend[2];
784     info->memory       = isend[3];
785     info->mallocs      = isend[4];
786   } else if (flag == MAT_GLOBAL_MAX) {
787     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
788 
789     info->nz_used      = irecv[0];
790     info->nz_allocated = irecv[1];
791     info->nz_unneeded  = irecv[2];
792     info->memory       = irecv[3];
793     info->mallocs      = irecv[4];
794   } else if (flag == MAT_GLOBAL_SUM) {
795     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
796 
797     info->nz_used      = irecv[0];
798     info->nz_allocated = irecv[1];
799     info->nz_unneeded  = irecv[2];
800     info->memory       = irecv[3];
801     info->mallocs      = irecv[4];
802   }
803   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
804   info->fill_ratio_needed = 0;
805   info->factor_mallocs    = 0;
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatSetOption_MPIDense"
811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
812 {
813   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   switch (op) {
818   case MAT_NEW_NONZERO_LOCATIONS:
819   case MAT_NEW_NONZERO_LOCATION_ERR:
820   case MAT_NEW_NONZERO_ALLOCATION_ERR:
821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
822     break;
823   case MAT_ROW_ORIENTED:
824     a->roworiented = flg;
825 
826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
827     break;
828   case MAT_NEW_DIAGONALS:
829   case MAT_KEEP_NONZERO_PATTERN:
830   case MAT_USE_HASH_TABLE:
831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
832     break;
833   case MAT_IGNORE_OFF_PROC_ENTRIES:
834     a->donotstash = flg;
835     break;
836   case MAT_SYMMETRIC:
837   case MAT_STRUCTURALLY_SYMMETRIC:
838   case MAT_HERMITIAN:
839   case MAT_SYMMETRY_ETERNAL:
840   case MAT_IGNORE_LOWER_TRIANGULAR:
841     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
842     break;
843   default:
844     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
845   }
846   PetscFunctionReturn(0);
847 }
848 
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatDiagonalScale_MPIDense"
852 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
853 {
854   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
855   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
856   PetscScalar    *l,*r,x,*v;
857   PetscErrorCode ierr;
858   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
859 
860   PetscFunctionBegin;
861   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
862   if (ll) {
863     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
864     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
865     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
866     for (i=0; i<m; i++) {
867       x = l[i];
868       v = mat->v + i;
869       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
870     }
871     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
872     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
873   }
874   if (rr) {
875     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
876     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
877     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
880     for (i=0; i<n; i++) {
881       x = r[i];
882       v = mat->v + i*m;
883       for (j=0; j<m; j++) (*v++) *= x;
884     }
885     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
886     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
887   }
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatNorm_MPIDense"
893 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
894 {
895   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
896   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
897   PetscErrorCode ierr;
898   PetscInt       i,j;
899   PetscReal      sum = 0.0;
900   PetscScalar    *v  = mat->v;
901 
902   PetscFunctionBegin;
903   if (mdn->size == 1) {
904     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
905   } else {
906     if (type == NORM_FROBENIUS) {
907       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909       }
910       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
911       *nrm = PetscSqrtReal(*nrm);
912       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
913     } else if (type == NORM_1) {
914       PetscReal *tmp,*tmp2;
915       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
916       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
917       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
918       *nrm = 0.0;
919       v    = mat->v;
920       for (j=0; j<mdn->A->cmap->n; j++) {
921         for (i=0; i<mdn->A->rmap->n; i++) {
922           tmp[j] += PetscAbsScalar(*v);  v++;
923         }
924       }
925       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
926       for (j=0; j<A->cmap->N; j++) {
927         if (tmp2[j] > *nrm) *nrm = tmp2[j];
928       }
929       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
930       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_INFINITY) { /* max row norm */
932       PetscReal ntemp;
933       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
934       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
935     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatTranspose_MPIDense"
942 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
943 {
944   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
945   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
946   Mat            B;
947   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
948   PetscErrorCode ierr;
949   PetscInt       j,i;
950   PetscScalar    *v;
951 
952   PetscFunctionBegin;
953   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
954   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
955     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
956     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
957     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
958     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
959   } else {
960     B = *matout;
961   }
962 
963   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
964   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
965   for (i=0; i<m; i++) rwork[i] = rstart + i;
966   for (j=0; j<n; j++) {
967     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
968     v   += m;
969   }
970   ierr = PetscFree(rwork);CHKERRQ(ierr);
971   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
974     *matout = B;
975   } else {
976     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 
982 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
983 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatSetUp_MPIDense"
987 PetscErrorCode MatSetUp_MPIDense(Mat A)
988 {
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatAXPY_MPIDense"
998 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
999 {
1000   PetscErrorCode ierr;
1001   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1002 
1003   PetscFunctionBegin;
1004   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1005   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatConjugate_MPIDense"
1011 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1012 {
1013   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatRealPart_MPIDense"
1023 PetscErrorCode MatRealPart_MPIDense(Mat A)
1024 {
1025   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1026   PetscErrorCode ierr;
1027 
1028   PetscFunctionBegin;
1029   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1035 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1036 {
1037   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1048 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1049 {
1050   PetscErrorCode ierr;
1051   PetscInt       i,n;
1052   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1053   PetscReal      *work;
1054 
1055   PetscFunctionBegin;
1056   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1057   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1058   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) work[i] *= work[i];
1061   }
1062   if (type == NORM_INFINITY) {
1063     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1064   } else {
1065     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(work);CHKERRQ(ierr);
1068   if (type == NORM_2) {
1069     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1070   }
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatSetRandom_MPIDense"
1076 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1077 {
1078   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1079   PetscErrorCode ierr;
1080   PetscScalar    *a;
1081   PetscInt       m,n,i;
1082 
1083   PetscFunctionBegin;
1084   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1085   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1086   for (i=0; i<m*n; i++) {
1087     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1088   }
1089   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 /* -------------------------------------------------------------------*/
1094 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1095                                         MatGetRow_MPIDense,
1096                                         MatRestoreRow_MPIDense,
1097                                         MatMult_MPIDense,
1098                                 /*  4*/ MatMultAdd_MPIDense,
1099                                         MatMultTranspose_MPIDense,
1100                                         MatMultTransposeAdd_MPIDense,
1101                                         0,
1102                                         0,
1103                                         0,
1104                                 /* 10*/ 0,
1105                                         0,
1106                                         0,
1107                                         0,
1108                                         MatTranspose_MPIDense,
1109                                 /* 15*/ MatGetInfo_MPIDense,
1110                                         MatEqual_MPIDense,
1111                                         MatGetDiagonal_MPIDense,
1112                                         MatDiagonalScale_MPIDense,
1113                                         MatNorm_MPIDense,
1114                                 /* 20*/ MatAssemblyBegin_MPIDense,
1115                                         MatAssemblyEnd_MPIDense,
1116                                         MatSetOption_MPIDense,
1117                                         MatZeroEntries_MPIDense,
1118                                 /* 24*/ MatZeroRows_MPIDense,
1119                                         0,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                 /* 29*/ MatSetUp_MPIDense,
1124                                         0,
1125                                         0,
1126                                         0,
1127                                         0,
1128                                 /* 34*/ MatDuplicate_MPIDense,
1129                                         0,
1130                                         0,
1131                                         0,
1132                                         0,
1133                                 /* 39*/ MatAXPY_MPIDense,
1134                                         MatGetSubMatrices_MPIDense,
1135                                         0,
1136                                         MatGetValues_MPIDense,
1137                                         0,
1138                                 /* 44*/ 0,
1139                                         MatScale_MPIDense,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                 /* 49*/ MatSetRandom_MPIDense,
1144                                         0,
1145                                         0,
1146                                         0,
1147                                         0,
1148                                 /* 54*/ 0,
1149                                         0,
1150                                         0,
1151                                         0,
1152                                         0,
1153                                 /* 59*/ MatGetSubMatrix_MPIDense,
1154                                         MatDestroy_MPIDense,
1155                                         MatView_MPIDense,
1156                                         0,
1157                                         0,
1158                                 /* 64*/ 0,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                 /* 69*/ 0,
1164                                         0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                 /* 74*/ 0,
1169                                         0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                 /* 79*/ 0,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                 /* 83*/ MatLoad_MPIDense,
1178                                         0,
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                 /* 89*/
1184                                         0,
1185                                         0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                 /* 94*/ 0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                 /* 99*/ 0,
1195                                         0,
1196                                         0,
1197                                         MatConjugate_MPIDense,
1198                                         0,
1199                                 /*104*/ 0,
1200                                         MatRealPart_MPIDense,
1201                                         MatImaginaryPart_MPIDense,
1202                                         0,
1203                                         0,
1204                                 /*109*/ 0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /*114*/ 0,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                 /*119*/ 0,
1215                                         0,
1216                                         0,
1217                                         0,
1218                                         0,
1219                                 /*124*/ 0,
1220                                         MatGetColumnNorms_MPIDense,
1221                                         0,
1222                                         0,
1223                                         0,
1224                                 /*129*/ 0,
1225                                         0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                 /*134*/ 0,
1230                                         0,
1231                                         0,
1232                                         0,
1233                                         0,
1234                                 /*139*/ 0,
1235                                         0,
1236                                         0
1237 };
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1241 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1242 {
1243   Mat_MPIDense   *a;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   mat->preallocated = PETSC_TRUE;
1248   /* Note:  For now, when data is specified above, this assumes the user correctly
1249    allocates the local dense storage space.  We should add error checking. */
1250 
1251   a       = (Mat_MPIDense*)mat->data;
1252   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1253   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1254   a->nvec = mat->cmap->n;
1255 
1256   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1257   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1258   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1259   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1260   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1266 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1267 {
1268   Mat            mat_elemental;
1269   PetscErrorCode ierr;
1270   PetscScalar    *array,*v_colwise;
1271   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,j,k,*rows,*cols;
1272 
1273   PetscFunctionBegin;
1274   ierr = PetscMalloc3(m*N,&v_colwise,m,&rows,N,&cols);CHKERRQ(ierr);
1275   ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr);
1276   /* convert column-wise array into row-wise v_colwise, see MatSetValues_Elemental() */
1277   k = 0;
1278   for (j=0; j<N; j++) {
1279     cols[j] = j;
1280     for (i=0; i<m; i++) {
1281       v_colwise[j*m+i] = array[k++];
1282     }
1283   }
1284   for (i=0; i<m; i++) {
1285     rows[i] = rstart + i;
1286   }
1287   ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr);
1288 
1289   ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1290   ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1291   ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1292   ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1293 
1294   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1295   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v_colwise,ADD_VALUES);CHKERRQ(ierr);
1296   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1297   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1298   ierr = PetscFree3(v_colwise,rows,cols);CHKERRQ(ierr);
1299 
1300   if (reuse == MAT_REUSE_MATRIX) {
1301     ierr = MatHeaderReplace(A,mat_elemental);CHKERRQ(ierr);
1302   } else {
1303     *newmat = mat_elemental;
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatCreate_MPIDense"
1310 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1311 {
1312   Mat_MPIDense   *a;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1317   mat->data = (void*)a;
1318   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1319 
1320   mat->insertmode = NOT_SET_VALUES;
1321   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1322   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1323 
1324   /* build cache for off array entries formed */
1325   a->donotstash = PETSC_FALSE;
1326 
1327   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1328 
1329   /* stuff used for matrix vector multiply */
1330   a->lvec        = 0;
1331   a->Mvctx       = 0;
1332   a->roworiented = PETSC_TRUE;
1333 
1334   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1335   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1336 #if defined(PETSC_HAVE_ELEMENTAL)
1337   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1338 #endif
1339   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1340   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1341   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1342   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1343   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1344 
1345   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1347   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1348   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 /*MC
1353    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1354 
1355    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1356    and MATMPIDENSE otherwise.
1357 
1358    Options Database Keys:
1359 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1360 
1361   Level: beginner
1362 
1363 
1364 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1365 M*/
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1369 /*@C
1370    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1371 
1372    Not collective
1373 
1374    Input Parameters:
1375 .  B - the matrix
1376 -  data - optional location of matrix data.  Set data=NULL for PETSc
1377    to control all matrix memory allocation.
1378 
1379    Notes:
1380    The dense format is fully compatible with standard Fortran 77
1381    storage by columns.
1382 
1383    The data input variable is intended primarily for Fortran programmers
1384    who wish to allocate their own matrix memory space.  Most users should
1385    set data=NULL.
1386 
1387    Level: intermediate
1388 
1389 .keywords: matrix,dense, parallel
1390 
1391 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1392 @*/
1393 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1394 {
1395   PetscErrorCode ierr;
1396 
1397   PetscFunctionBegin;
1398   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatCreateDense"
1404 /*@C
1405    MatCreateDense - Creates a parallel matrix in dense format.
1406 
1407    Collective on MPI_Comm
1408 
1409    Input Parameters:
1410 +  comm - MPI communicator
1411 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1412 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1413 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1414 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1415 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1416    to control all matrix memory allocation.
1417 
1418    Output Parameter:
1419 .  A - the matrix
1420 
1421    Notes:
1422    The dense format is fully compatible with standard Fortran 77
1423    storage by columns.
1424 
1425    The data input variable is intended primarily for Fortran programmers
1426    who wish to allocate their own matrix memory space.  Most users should
1427    set data=NULL (NULL_SCALAR for Fortran users).
1428 
1429    The user MUST specify either the local or global matrix dimensions
1430    (possibly both).
1431 
1432    Level: intermediate
1433 
1434 .keywords: matrix,dense, parallel
1435 
1436 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1437 @*/
1438 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1439 {
1440   PetscErrorCode ierr;
1441   PetscMPIInt    size;
1442 
1443   PetscFunctionBegin;
1444   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1445   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1446   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1447   if (size > 1) {
1448     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1449     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1450   } else {
1451     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1452     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatDuplicate_MPIDense"
1459 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1460 {
1461   Mat            mat;
1462   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   *newmat = 0;
1467   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1468   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1469   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1470   a       = (Mat_MPIDense*)mat->data;
1471   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1472 
1473   mat->factortype   = A->factortype;
1474   mat->assembled    = PETSC_TRUE;
1475   mat->preallocated = PETSC_TRUE;
1476 
1477   a->size         = oldmat->size;
1478   a->rank         = oldmat->rank;
1479   mat->insertmode = NOT_SET_VALUES;
1480   a->nvec         = oldmat->nvec;
1481   a->donotstash   = oldmat->donotstash;
1482 
1483   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1484   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1485 
1486   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1487   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1488   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1489 
1490   *newmat = mat;
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1496 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1497 {
1498   PetscErrorCode ierr;
1499   PetscMPIInt    rank,size;
1500   PetscInt       *rowners,i,m,nz,j;
1501   PetscScalar    *array,*vals,*vals_ptr;
1502 
1503   PetscFunctionBegin;
1504   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1505   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1506 
1507   /* determine ownership of all rows */
1508   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1509   else m = newmat->rmap->n;
1510   ierr       = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr);
1511   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1512   rowners[0] = 0;
1513   for (i=2; i<=size; i++) {
1514     rowners[i] += rowners[i-1];
1515   }
1516 
1517   if (!sizesset) {
1518     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1519   }
1520   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1521   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1522 
1523   if (!rank) {
1524     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1525 
1526     /* read in my part of the matrix numerical values  */
1527     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1528 
1529     /* insert into matrix-by row (this is why cannot directly read into array */
1530     vals_ptr = vals;
1531     for (i=0; i<m; i++) {
1532       for (j=0; j<N; j++) {
1533         array[i + j*m] = *vals_ptr++;
1534       }
1535     }
1536 
1537     /* read in other processors and ship out */
1538     for (i=1; i<size; i++) {
1539       nz   = (rowners[i+1] - rowners[i])*N;
1540       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1541       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1542     }
1543   } else {
1544     /* receive numeric values */
1545     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1546 
1547     /* receive message of values*/
1548     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1549 
1550     /* insert into matrix-by row (this is why cannot directly read into array */
1551     vals_ptr = vals;
1552     for (i=0; i<m; i++) {
1553       for (j=0; j<N; j++) {
1554         array[i + j*m] = *vals_ptr++;
1555       }
1556     }
1557   }
1558   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1559   ierr = PetscFree(rowners);CHKERRQ(ierr);
1560   ierr = PetscFree(vals);CHKERRQ(ierr);
1561   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1562   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatLoad_MPIDense"
1568 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1569 {
1570   PetscScalar    *vals,*svals;
1571   MPI_Comm       comm;
1572   MPI_Status     status;
1573   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1574   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1575   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1576   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1577   int            fd;
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1582   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1583   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1584   if (!rank) {
1585     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1586     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1587     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1588   }
1589   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1590 
1591   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1592   M    = header[1]; N = header[2]; nz = header[3];
1593 
1594   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1595   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1596   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1597 
1598   /* If global sizes are set, check if they are consistent with that given in the file */
1599   if (sizesset) {
1600     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1601   }
1602   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1603   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1604 
1605   /*
1606        Handle case where matrix is stored on disk as a dense matrix
1607   */
1608   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1609     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1610     PetscFunctionReturn(0);
1611   }
1612 
1613   /* determine ownership of all rows */
1614   if (newmat->rmap->n < 0) {
1615     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1616   } else {
1617     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1618   }
1619   ierr       = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr);
1620   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1621   rowners[0] = 0;
1622   for (i=2; i<=size; i++) {
1623     rowners[i] += rowners[i-1];
1624   }
1625   rstart = rowners[rank];
1626   rend   = rowners[rank+1];
1627 
1628   /* distribute row lengths to all processors */
1629   ierr = PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);CHKERRQ(ierr);
1630   if (!rank) {
1631     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1632     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1633     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1634     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1635     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1636     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1637   } else {
1638     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1639   }
1640 
1641   if (!rank) {
1642     /* calculate the number of nonzeros on each processor */
1643     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1644     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1645     for (i=0; i<size; i++) {
1646       for (j=rowners[i]; j< rowners[i+1]; j++) {
1647         procsnz[i] += rowlengths[j];
1648       }
1649     }
1650     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1651 
1652     /* determine max buffer needed and allocate it */
1653     maxnz = 0;
1654     for (i=0; i<size; i++) {
1655       maxnz = PetscMax(maxnz,procsnz[i]);
1656     }
1657     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1658 
1659     /* read in my part of the matrix column indices  */
1660     nz   = procsnz[0];
1661     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1662     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1663 
1664     /* read in every one elses and ship off */
1665     for (i=1; i<size; i++) {
1666       nz   = procsnz[i];
1667       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1668       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1669     }
1670     ierr = PetscFree(cols);CHKERRQ(ierr);
1671   } else {
1672     /* determine buffer space needed for message */
1673     nz = 0;
1674     for (i=0; i<m; i++) {
1675       nz += ourlens[i];
1676     }
1677     ierr = PetscMalloc1((nz+1),&mycols);CHKERRQ(ierr);
1678 
1679     /* receive message of column indices*/
1680     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1681     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1682     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1683   }
1684 
1685   /* loop over local rows, determining number of off diagonal entries */
1686   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1687   jj   = 0;
1688   for (i=0; i<m; i++) {
1689     for (j=0; j<ourlens[i]; j++) {
1690       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1691       jj++;
1692     }
1693   }
1694 
1695   /* create our matrix */
1696   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1697 
1698   if (!sizesset) {
1699     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1700   }
1701   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1702   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1703 
1704   if (!rank) {
1705     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1706 
1707     /* read in my part of the matrix numerical values  */
1708     nz   = procsnz[0];
1709     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1710 
1711     /* insert into matrix */
1712     jj      = rstart;
1713     smycols = mycols;
1714     svals   = vals;
1715     for (i=0; i<m; i++) {
1716       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1717       smycols += ourlens[i];
1718       svals   += ourlens[i];
1719       jj++;
1720     }
1721 
1722     /* read in other processors and ship out */
1723     for (i=1; i<size; i++) {
1724       nz   = procsnz[i];
1725       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1726       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1727     }
1728     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1729   } else {
1730     /* receive numeric values */
1731     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
1732 
1733     /* receive message of values*/
1734     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1735     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1736     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1737 
1738     /* insert into matrix */
1739     jj      = rstart;
1740     smycols = mycols;
1741     svals   = vals;
1742     for (i=0; i<m; i++) {
1743       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1744       smycols += ourlens[i];
1745       svals   += ourlens[i];
1746       jj++;
1747     }
1748   }
1749   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1750   ierr = PetscFree(vals);CHKERRQ(ierr);
1751   ierr = PetscFree(mycols);CHKERRQ(ierr);
1752   ierr = PetscFree(rowners);CHKERRQ(ierr);
1753 
1754   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1755   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatEqual_MPIDense"
1761 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1762 {
1763   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1764   Mat            a,b;
1765   PetscBool      flg;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   a    = matA->A;
1770   b    = matB->A;
1771   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1772   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776