xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 3dd83b3843bef2e44404f8ac40307e9aa6509644)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatDenseGetLocalMatrix"
13 /*@
14 
15       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
16               matrix that represents the operator. For sequential matrices it returns itself.
17 
18     Input Parameter:
19 .      A - the Seq or MPI dense matrix
20 
21     Output Parameter:
22 .      B - the inner matrix
23 
24     Level: intermediate
25 
26 @*/
27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
28 {
29   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
30   PetscErrorCode ierr;
31   PetscBool      flg;
32 
33   PetscFunctionBegin;
34   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
35   if (flg) *B = mat->A;
36   else *B = A;
37   PetscFunctionReturn(0);
38 }
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatGetRow_MPIDense"
42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
43 {
44   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
45   PetscErrorCode ierr;
46   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
47 
48   PetscFunctionBegin;
49   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
50   lrow = row - rstart;
51   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
52   PetscFunctionReturn(0);
53 }
54 
55 #undef __FUNCT__
56 #define __FUNCT__ "MatRestoreRow_MPIDense"
57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
58 {
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
63   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   Mat            B;
77 
78   PetscFunctionBegin;
79   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
80 
81   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
82   if (!B) {
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
465 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
466 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
467 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMult_MPIDense"
471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatMultAdd_MPIDense"
485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatMultTranspose_MPIDense"
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode ierr;
503   PetscScalar    zero = 0.0;
504 
505   PetscFunctionBegin;
506   ierr = VecSet(yy,zero);CHKERRQ(ierr);
507   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
508   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516 {
517   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
518   PetscErrorCode ierr;
519 
520   PetscFunctionBegin;
521   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
522   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
523   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatGetDiagonal_MPIDense"
530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531 {
532   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
533   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
534   PetscErrorCode ierr;
535   PetscInt       len,i,n,m = A->rmap->n,radd;
536   PetscScalar    *x,zero = 0.0;
537 
538   PetscFunctionBegin;
539   ierr = VecSet(v,zero);CHKERRQ(ierr);
540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
541   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
544   radd = A->rmap->rstart*m;
545   for (i=0; i<len; i++) {
546     x[i] = aloc->v[radd + i*m + i];
547   }
548   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatDestroy_MPIDense"
554 PetscErrorCode MatDestroy_MPIDense(Mat mat)
555 {
556   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
557   PetscErrorCode ierr;
558 
559   PetscFunctionBegin;
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
565   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
566   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
567 
568   ierr = PetscFree(mat->data);CHKERRQ(ierr);
569   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
570 
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
573 #if defined(PETSC_HAVE_ELEMENTAL)
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
575 #endif
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatView_MPIDense_Binary"
589 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
590 {
591   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
592   PetscErrorCode    ierr;
593   PetscViewerFormat format;
594   int               fd;
595   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
596   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
597   PetscScalar       *work,*v,*vv;
598   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
599 
600   PetscFunctionBegin;
601   if (mdn->size == 1) {
602     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
603   } else {
604     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
605     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
606     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
607 
608     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
609     if (format == PETSC_VIEWER_NATIVE) {
610 
611       if (!rank) {
612         /* store the matrix as a dense matrix */
613         header[0] = MAT_FILE_CLASSID;
614         header[1] = mat->rmap->N;
615         header[2] = N;
616         header[3] = MATRIX_BINARY_FORMAT_DENSE;
617         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
618 
619         /* get largest work array needed for transposing array */
620         mmax = mat->rmap->n;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
625 
626         /* write out local array, by rows */
627         m = mat->rmap->n;
628         v = a->v;
629         for (j=0; j<N; j++) {
630           for (i=0; i<m; i++) {
631             work[j + i*N] = *v++;
632           }
633         }
634         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
635         /* get largest work array to receive messages from other processes, excludes process zero */
636         mmax = 0;
637         for (i=1; i<size; i++) {
638           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
639         }
640         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
641         for (k = 1; k < size; k++) {
642           v    = vv;
643           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
644           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
645 
646           for (j = 0; j < N; j++) {
647             for (i = 0; i < m; i++) {
648               work[j + i*N] = *v++;
649             }
650           }
651           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
652         }
653         ierr = PetscFree(work);CHKERRQ(ierr);
654         ierr = PetscFree(vv);CHKERRQ(ierr);
655       } else {
656         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657       }
658     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
664 #include <petscdraw.h>
665 #undef __FUNCT__
666 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
667 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
668 {
669   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
670   PetscErrorCode    ierr;
671   PetscMPIInt       rank = mdn->rank;
672   PetscViewerType   vtype;
673   PetscBool         iascii,isdraw;
674   PetscViewer       sviewer;
675   PetscViewerFormat format;
676 
677   PetscFunctionBegin;
678   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
679   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
680   if (iascii) {
681     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
686       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
688       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
689       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
690       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
691       PetscFunctionReturn(0);
692     } else if (format == PETSC_VIEWER_ASCII_INFO) {
693       PetscFunctionReturn(0);
694     }
695   } else if (isdraw) {
696     PetscDraw draw;
697     PetscBool isnull;
698 
699     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
700     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
701     if (isnull) PetscFunctionReturn(0);
702   }
703 
704   {
705     /* assemble the entire matrix onto first processor. */
706     Mat         A;
707     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
708     PetscInt    *cols;
709     PetscScalar *vals;
710 
711     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
712     if (!rank) {
713       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
714     } else {
715       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
716     }
717     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
718     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
719     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
720     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
721 
722     /* Copy the matrix ... This isn't the most efficient means,
723        but it's quick for now */
724     A->insertmode = INSERT_VALUES;
725 
726     row = mat->rmap->rstart;
727     m   = mdn->A->rmap->n;
728     for (i=0; i<m; i++) {
729       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
730       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
731       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
732       row++;
733     }
734 
735     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
738     if (!rank) {
739       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
740       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
741     }
742     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
743     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
744     ierr = MatDestroy(&A);CHKERRQ(ierr);
745   }
746   PetscFunctionReturn(0);
747 }
748 
749 #undef __FUNCT__
750 #define __FUNCT__ "MatView_MPIDense"
751 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
752 {
753   PetscErrorCode ierr;
754   PetscBool      iascii,isbinary,isdraw,issocket;
755 
756   PetscFunctionBegin;
757   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
759   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
760   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
761 
762   if (iascii || issocket || isdraw) {
763     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
764   } else if (isbinary) {
765     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
766   }
767   PetscFunctionReturn(0);
768 }
769 
770 #undef __FUNCT__
771 #define __FUNCT__ "MatGetInfo_MPIDense"
772 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
773 {
774   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
775   Mat            mdn  = mat->A;
776   PetscErrorCode ierr;
777   PetscReal      isend[5],irecv[5];
778 
779   PetscFunctionBegin;
780   info->block_size = 1.0;
781 
782   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
783 
784   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
785   isend[3] = info->memory;  isend[4] = info->mallocs;
786   if (flag == MAT_LOCAL) {
787     info->nz_used      = isend[0];
788     info->nz_allocated = isend[1];
789     info->nz_unneeded  = isend[2];
790     info->memory       = isend[3];
791     info->mallocs      = isend[4];
792   } else if (flag == MAT_GLOBAL_MAX) {
793     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
794 
795     info->nz_used      = irecv[0];
796     info->nz_allocated = irecv[1];
797     info->nz_unneeded  = irecv[2];
798     info->memory       = irecv[3];
799     info->mallocs      = irecv[4];
800   } else if (flag == MAT_GLOBAL_SUM) {
801     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
802 
803     info->nz_used      = irecv[0];
804     info->nz_allocated = irecv[1];
805     info->nz_unneeded  = irecv[2];
806     info->memory       = irecv[3];
807     info->mallocs      = irecv[4];
808   }
809   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
810   info->fill_ratio_needed = 0;
811   info->factor_mallocs    = 0;
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "MatSetOption_MPIDense"
817 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
818 {
819   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
820   PetscErrorCode ierr;
821 
822   PetscFunctionBegin;
823   switch (op) {
824   case MAT_NEW_NONZERO_LOCATIONS:
825   case MAT_NEW_NONZERO_LOCATION_ERR:
826   case MAT_NEW_NONZERO_ALLOCATION_ERR:
827     MatCheckPreallocated(A,1);
828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
829     break;
830   case MAT_ROW_ORIENTED:
831     MatCheckPreallocated(A,1);
832     a->roworiented = flg;
833     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
834     break;
835   case MAT_NEW_DIAGONALS:
836   case MAT_KEEP_NONZERO_PATTERN:
837   case MAT_USE_HASH_TABLE:
838     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
839     break;
840   case MAT_IGNORE_OFF_PROC_ENTRIES:
841     a->donotstash = flg;
842     break;
843   case MAT_SYMMETRIC:
844   case MAT_STRUCTURALLY_SYMMETRIC:
845   case MAT_HERMITIAN:
846   case MAT_SYMMETRY_ETERNAL:
847   case MAT_IGNORE_LOWER_TRIANGULAR:
848     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
849     break;
850   default:
851     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
852   }
853   PetscFunctionReturn(0);
854 }
855 
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatDiagonalScale_MPIDense"
859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860 {
861   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
862   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
863   PetscScalar    *l,*r,x,*v;
864   PetscErrorCode ierr;
865   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
866 
867   PetscFunctionBegin;
868   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
869   if (ll) {
870     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
871     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
872     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
873     for (i=0; i<m; i++) {
874       x = l[i];
875       v = mat->v + i;
876       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
877     }
878     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
879     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
880   }
881   if (rr) {
882     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
883     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
884     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
887     for (i=0; i<n; i++) {
888       x = r[i];
889       v = mat->v + i*m;
890       for (j=0; j<m; j++) (*v++) *= x;
891     }
892     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
893     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
894   }
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatNorm_MPIDense"
900 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
901 {
902   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
903   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
904   PetscErrorCode ierr;
905   PetscInt       i,j;
906   PetscReal      sum = 0.0;
907   PetscScalar    *v  = mat->v;
908 
909   PetscFunctionBegin;
910   if (mdn->size == 1) {
911     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
912   } else {
913     if (type == NORM_FROBENIUS) {
914       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
915         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
916       }
917       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
918       *nrm = PetscSqrtReal(*nrm);
919       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
920     } else if (type == NORM_1) {
921       PetscReal *tmp,*tmp2;
922       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
925       *nrm = 0.0;
926       v    = mat->v;
927       for (j=0; j<mdn->A->cmap->n; j++) {
928         for (i=0; i<mdn->A->rmap->n; i++) {
929           tmp[j] += PetscAbsScalar(*v);  v++;
930         }
931       }
932       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
933       for (j=0; j<A->cmap->N; j++) {
934         if (tmp2[j] > *nrm) *nrm = tmp2[j];
935       }
936       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
937       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
938     } else if (type == NORM_INFINITY) { /* max row norm */
939       PetscReal ntemp;
940       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
941       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
942     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatTranspose_MPIDense"
949 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
950 {
951   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
952   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
953   Mat            B;
954   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
955   PetscErrorCode ierr;
956   PetscInt       j,i;
957   PetscScalar    *v;
958 
959   PetscFunctionBegin;
960   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
961   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
962     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
963     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
964     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
965     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
966   } else {
967     B = *matout;
968   }
969 
970   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
971   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
972   for (i=0; i<m; i++) rwork[i] = rstart + i;
973   for (j=0; j<n; j++) {
974     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
975     v   += m;
976   }
977   ierr = PetscFree(rwork);CHKERRQ(ierr);
978   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
981     *matout = B;
982   } else {
983     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
984   }
985   PetscFunctionReturn(0);
986 }
987 
988 
989 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
990 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatSetUp_MPIDense"
994 PetscErrorCode MatSetUp_MPIDense(Mat A)
995 {
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatAXPY_MPIDense"
1005 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1006 {
1007   PetscErrorCode ierr;
1008   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1009 
1010   PetscFunctionBegin;
1011   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1012   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatConjugate_MPIDense"
1018 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatRealPart_MPIDense"
1030 PetscErrorCode MatRealPart_MPIDense(Mat A)
1031 {
1032   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1033   PetscErrorCode ierr;
1034 
1035   PetscFunctionBegin;
1036   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 #undef __FUNCT__
1041 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1042 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1043 {
1044   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1045   PetscErrorCode ierr;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1055 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1056 {
1057   PetscErrorCode ierr;
1058   PetscInt       i,n;
1059   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1060   PetscReal      *work;
1061 
1062   PetscFunctionBegin;
1063   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1064   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1065   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1066   if (type == NORM_2) {
1067     for (i=0; i<n; i++) work[i] *= work[i];
1068   }
1069   if (type == NORM_INFINITY) {
1070     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1071   } else {
1072     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1073   }
1074   ierr = PetscFree(work);CHKERRQ(ierr);
1075   if (type == NORM_2) {
1076     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1077   }
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatSetRandom_MPIDense"
1083 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1084 {
1085   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1086   PetscErrorCode ierr;
1087   PetscScalar    *a;
1088   PetscInt       m,n,i;
1089 
1090   PetscFunctionBegin;
1091   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1092   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1093   for (i=0; i<m*n; i++) {
1094     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1095   }
1096   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatMissingDiagonal_MPIDense"
1104 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1105 {
1106   PetscFunctionBegin;
1107   *missing = PETSC_FALSE;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 /* -------------------------------------------------------------------*/
1112 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1113                                         MatGetRow_MPIDense,
1114                                         MatRestoreRow_MPIDense,
1115                                         MatMult_MPIDense,
1116                                 /*  4*/ MatMultAdd_MPIDense,
1117                                         MatMultTranspose_MPIDense,
1118                                         MatMultTransposeAdd_MPIDense,
1119                                         0,
1120                                         0,
1121                                         0,
1122                                 /* 10*/ 0,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                         MatTranspose_MPIDense,
1127                                 /* 15*/ MatGetInfo_MPIDense,
1128                                         MatEqual_MPIDense,
1129                                         MatGetDiagonal_MPIDense,
1130                                         MatDiagonalScale_MPIDense,
1131                                         MatNorm_MPIDense,
1132                                 /* 20*/ MatAssemblyBegin_MPIDense,
1133                                         MatAssemblyEnd_MPIDense,
1134                                         MatSetOption_MPIDense,
1135                                         MatZeroEntries_MPIDense,
1136                                 /* 24*/ MatZeroRows_MPIDense,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         0,
1141                                 /* 29*/ MatSetUp_MPIDense,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                 /* 34*/ MatDuplicate_MPIDense,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                         0,
1151                                 /* 39*/ MatAXPY_MPIDense,
1152                                         MatGetSubMatrices_MPIDense,
1153                                         0,
1154                                         MatGetValues_MPIDense,
1155                                         0,
1156                                 /* 44*/ 0,
1157                                         MatScale_MPIDense,
1158                                         MatShift_Basic,
1159                                         0,
1160                                         0,
1161                                 /* 49*/ MatSetRandom_MPIDense,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                 /* 54*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 59*/ MatGetSubMatrix_MPIDense,
1172                                         MatDestroy_MPIDense,
1173                                         MatView_MPIDense,
1174                                         0,
1175                                         0,
1176                                 /* 64*/ 0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                         0,
1181                                 /* 69*/ 0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                         0,
1186                                 /* 74*/ 0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                 /* 79*/ 0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                 /* 83*/ MatLoad_MPIDense,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                         0,
1201 #if defined(PETSC_HAVE_ELEMENTAL)
1202                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1203                                         MatMatMultSymbolic_MPIDense_MPIDense,
1204 #else
1205                                 /* 89*/ 0,
1206                                         0,
1207 #endif
1208                                         MatMatMultNumeric_MPIDense,
1209                                         0,
1210                                         0,
1211                                 /* 94*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /* 99*/ 0,
1217                                         0,
1218                                         0,
1219                                         MatConjugate_MPIDense,
1220                                         0,
1221                                 /*104*/ 0,
1222                                         MatRealPart_MPIDense,
1223                                         MatImaginaryPart_MPIDense,
1224                                         0,
1225                                         0,
1226                                 /*109*/ 0,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                         MatMissingDiagonal_MPIDense,
1231                                 /*114*/ 0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                         0,
1236                                 /*119*/ 0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                         0,
1241                                 /*124*/ 0,
1242                                         MatGetColumnNorms_MPIDense,
1243                                         0,
1244                                         0,
1245                                         0,
1246                                 /*129*/ 0,
1247                                         MatTransposeMatMult_MPIDense_MPIDense,
1248                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1249                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1250                                         0,
1251                                 /*134*/ 0,
1252                                         0,
1253                                         0,
1254                                         0,
1255                                         0,
1256                                 /*139*/ 0,
1257                                         0,
1258                                         0
1259 };
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1263 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1264 {
1265   Mat_MPIDense   *a;
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269   mat->preallocated = PETSC_TRUE;
1270   /* Note:  For now, when data is specified above, this assumes the user correctly
1271    allocates the local dense storage space.  We should add error checking. */
1272 
1273   a       = (Mat_MPIDense*)mat->data;
1274   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1275   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1276   a->nvec = mat->cmap->n;
1277 
1278   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1279   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1280   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1281   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1282   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #if defined(PETSC_HAVE_ELEMENTAL)
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1289 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1290 {
1291   Mat            mat_elemental;
1292   PetscErrorCode ierr;
1293   PetscScalar    *v;
1294   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1295 
1296   PetscFunctionBegin;
1297   if (reuse == MAT_REUSE_MATRIX) {
1298     mat_elemental = *newmat;
1299     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1300   } else {
1301     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1302     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1303     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1304     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1305     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1306   }
1307 
1308   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1309   for (i=0; i<N; i++) cols[i] = i;
1310   for (i=0; i<m; i++) rows[i] = rstart + i;
1311 
1312   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1313   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1314   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1315   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1316   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1317   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1318   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1319 
1320   if (reuse == MAT_INPLACE_MATRIX) {
1321     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1322   } else {
1323     *newmat = mat_elemental;
1324   }
1325   PetscFunctionReturn(0);
1326 }
1327 #endif
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatCreate_MPIDense"
1331 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1332 {
1333   Mat_MPIDense   *a;
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1338   mat->data = (void*)a;
1339   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1340 
1341   mat->insertmode = NOT_SET_VALUES;
1342   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1343   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1344 
1345   /* build cache for off array entries formed */
1346   a->donotstash = PETSC_FALSE;
1347 
1348   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1349 
1350   /* stuff used for matrix vector multiply */
1351   a->lvec        = 0;
1352   a->Mvctx       = 0;
1353   a->roworiented = PETSC_TRUE;
1354 
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1357 #if defined(PETSC_HAVE_ELEMENTAL)
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1359 #endif
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1363   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1364   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1365 
1366   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1367   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1368   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1369   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 /*MC
1374    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1375 
1376    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1377    and MATMPIDENSE otherwise.
1378 
1379    Options Database Keys:
1380 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1381 
1382   Level: beginner
1383 
1384 
1385 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1386 M*/
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1390 /*@C
1391    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1392 
1393    Not collective
1394 
1395    Input Parameters:
1396 .  B - the matrix
1397 -  data - optional location of matrix data.  Set data=NULL for PETSc
1398    to control all matrix memory allocation.
1399 
1400    Notes:
1401    The dense format is fully compatible with standard Fortran 77
1402    storage by columns.
1403 
1404    The data input variable is intended primarily for Fortran programmers
1405    who wish to allocate their own matrix memory space.  Most users should
1406    set data=NULL.
1407 
1408    Level: intermediate
1409 
1410 .keywords: matrix,dense, parallel
1411 
1412 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1413 @*/
1414 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1415 {
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatCreateDense"
1425 /*@C
1426    MatCreateDense - Creates a parallel matrix in dense format.
1427 
1428    Collective on MPI_Comm
1429 
1430    Input Parameters:
1431 +  comm - MPI communicator
1432 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1433 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1434 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1435 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1436 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1437    to control all matrix memory allocation.
1438 
1439    Output Parameter:
1440 .  A - the matrix
1441 
1442    Notes:
1443    The dense format is fully compatible with standard Fortran 77
1444    storage by columns.
1445 
1446    The data input variable is intended primarily for Fortran programmers
1447    who wish to allocate their own matrix memory space.  Most users should
1448    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1449 
1450    The user MUST specify either the local or global matrix dimensions
1451    (possibly both).
1452 
1453    Level: intermediate
1454 
1455 .keywords: matrix,dense, parallel
1456 
1457 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1458 @*/
1459 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1460 {
1461   PetscErrorCode ierr;
1462   PetscMPIInt    size;
1463 
1464   PetscFunctionBegin;
1465   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1466   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1467   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1468   if (size > 1) {
1469     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1470     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1471     if (data) {  /* user provided data array, so no need to assemble */
1472       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1473       (*A)->assembled = PETSC_TRUE;
1474     }
1475   } else {
1476     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1477     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatDuplicate_MPIDense"
1484 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1485 {
1486   Mat            mat;
1487   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1488   PetscErrorCode ierr;
1489 
1490   PetscFunctionBegin;
1491   *newmat = 0;
1492   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1493   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1494   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1495   a       = (Mat_MPIDense*)mat->data;
1496   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1497 
1498   mat->factortype   = A->factortype;
1499   mat->assembled    = PETSC_TRUE;
1500   mat->preallocated = PETSC_TRUE;
1501 
1502   a->size         = oldmat->size;
1503   a->rank         = oldmat->rank;
1504   mat->insertmode = NOT_SET_VALUES;
1505   a->nvec         = oldmat->nvec;
1506   a->donotstash   = oldmat->donotstash;
1507 
1508   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1509   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1510 
1511   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1512   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1513   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1514 
1515   *newmat = mat;
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1521 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1522 {
1523   PetscErrorCode ierr;
1524   PetscMPIInt    rank,size;
1525   const PetscInt *rowners;
1526   PetscInt       i,m,n,nz,j,mMax;
1527   PetscScalar    *array,*vals,*vals_ptr;
1528   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1529 
1530   PetscFunctionBegin;
1531   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1533 
1534   /* determine ownership of rows and columns */
1535   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1536   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1537 
1538   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1539   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1540     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1541   }
1542   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1543   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1544   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1545   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1546   if (!rank) {
1547     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1548 
1549     /* read in my part of the matrix numerical values  */
1550     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1551 
1552     /* insert into matrix-by row (this is why cannot directly read into array */
1553     vals_ptr = vals;
1554     for (i=0; i<m; i++) {
1555       for (j=0; j<N; j++) {
1556         array[i + j*m] = *vals_ptr++;
1557       }
1558     }
1559 
1560     /* read in other processors and ship out */
1561     for (i=1; i<size; i++) {
1562       nz   = (rowners[i+1] - rowners[i])*N;
1563       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1564       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1565     }
1566   } else {
1567     /* receive numeric values */
1568     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1569 
1570     /* receive message of values*/
1571     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1572 
1573     /* insert into matrix-by row (this is why cannot directly read into array */
1574     vals_ptr = vals;
1575     for (i=0; i<m; i++) {
1576       for (j=0; j<N; j++) {
1577         array[i + j*m] = *vals_ptr++;
1578       }
1579     }
1580   }
1581   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1582   ierr = PetscFree(vals);CHKERRQ(ierr);
1583   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1584   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatLoad_MPIDense"
1590 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1591 {
1592   Mat_MPIDense   *a;
1593   PetscScalar    *vals,*svals;
1594   MPI_Comm       comm;
1595   MPI_Status     status;
1596   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1597   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1598   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1599   PetscInt       i,nz,j,rstart,rend;
1600   int            fd;
1601   PetscErrorCode ierr;
1602 
1603   PetscFunctionBegin;
1604   /* force binary viewer to load .info file if it has not yet done so */
1605   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1606   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1607   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1608   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1609   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1610   if (!rank) {
1611     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1612     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1613   }
1614   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1615   M    = header[1]; N = header[2]; nz = header[3];
1616 
1617   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1618   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1619   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1620 
1621   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1622   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1623 
1624   /*
1625        Handle case where matrix is stored on disk as a dense matrix
1626   */
1627   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1628     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1629     PetscFunctionReturn(0);
1630   }
1631 
1632   /* determine ownership of all rows */
1633   if (newmat->rmap->n < 0) {
1634     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1635   } else {
1636     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1637   }
1638   if (newmat->cmap->n < 0) {
1639     n = PETSC_DECIDE;
1640   } else {
1641     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1642   }
1643 
1644   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1645   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1646   rowners[0] = 0;
1647   for (i=2; i<=size; i++) {
1648     rowners[i] += rowners[i-1];
1649   }
1650   rstart = rowners[rank];
1651   rend   = rowners[rank+1];
1652 
1653   /* distribute row lengths to all processors */
1654   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1655   if (!rank) {
1656     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1657     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1658     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1659     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1660     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1661     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1662   } else {
1663     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1664   }
1665 
1666   if (!rank) {
1667     /* calculate the number of nonzeros on each processor */
1668     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1669     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1670     for (i=0; i<size; i++) {
1671       for (j=rowners[i]; j< rowners[i+1]; j++) {
1672         procsnz[i] += rowlengths[j];
1673       }
1674     }
1675     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1676 
1677     /* determine max buffer needed and allocate it */
1678     maxnz = 0;
1679     for (i=0; i<size; i++) {
1680       maxnz = PetscMax(maxnz,procsnz[i]);
1681     }
1682     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1683 
1684     /* read in my part of the matrix column indices  */
1685     nz   = procsnz[0];
1686     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1687     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1688 
1689     /* read in every one elses and ship off */
1690     for (i=1; i<size; i++) {
1691       nz   = procsnz[i];
1692       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1693       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1694     }
1695     ierr = PetscFree(cols);CHKERRQ(ierr);
1696   } else {
1697     /* determine buffer space needed for message */
1698     nz = 0;
1699     for (i=0; i<m; i++) {
1700       nz += ourlens[i];
1701     }
1702     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1703 
1704     /* receive message of column indices*/
1705     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1706     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1707     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1708   }
1709 
1710   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1711   a = (Mat_MPIDense*)newmat->data;
1712   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1713     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1714   }
1715 
1716   if (!rank) {
1717     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1718 
1719     /* read in my part of the matrix numerical values  */
1720     nz   = procsnz[0];
1721     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1722 
1723     /* insert into matrix */
1724     jj      = rstart;
1725     smycols = mycols;
1726     svals   = vals;
1727     for (i=0; i<m; i++) {
1728       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1729       smycols += ourlens[i];
1730       svals   += ourlens[i];
1731       jj++;
1732     }
1733 
1734     /* read in other processors and ship out */
1735     for (i=1; i<size; i++) {
1736       nz   = procsnz[i];
1737       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1738       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1739     }
1740     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1741   } else {
1742     /* receive numeric values */
1743     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1744 
1745     /* receive message of values*/
1746     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1747     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1748     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1749 
1750     /* insert into matrix */
1751     jj      = rstart;
1752     smycols = mycols;
1753     svals   = vals;
1754     for (i=0; i<m; i++) {
1755       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1756       smycols += ourlens[i];
1757       svals   += ourlens[i];
1758       jj++;
1759     }
1760   }
1761   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1762   ierr = PetscFree(vals);CHKERRQ(ierr);
1763   ierr = PetscFree(mycols);CHKERRQ(ierr);
1764   ierr = PetscFree(rowners);CHKERRQ(ierr);
1765 
1766   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1767   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #undef __FUNCT__
1772 #define __FUNCT__ "MatEqual_MPIDense"
1773 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1774 {
1775   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1776   Mat            a,b;
1777   PetscBool      flg;
1778   PetscErrorCode ierr;
1779 
1780   PetscFunctionBegin;
1781   a    = matA->A;
1782   b    = matB->A;
1783   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1784   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 #undef __FUNCT__
1789 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense"
1790 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1791 {
1792   PetscErrorCode        ierr;
1793   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1794   Mat_TransMatMultDense *atb = a->atbdense;
1795 
1796   PetscFunctionBegin;
1797   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1798   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1799   ierr = PetscFree(atb);CHKERRQ(ierr);
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense"
1805 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1806 {
1807   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1808   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1809   Mat_TransMatMultDense *atb = c->atbdense;
1810   PetscErrorCode ierr;
1811   MPI_Comm       comm;
1812   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1813   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1814   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1815   PetscScalar    _DOne=1.0,_DZero=0.0;
1816   PetscBLASInt   am,an,bn,aN;
1817   const PetscInt *ranges;
1818 
1819   PetscFunctionBegin;
1820   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1821   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1823 
1824   /* compute atbarray = aseq^T * bseq */
1825   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1826   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1827   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1828   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1829   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1830 
1831   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1832   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1833 
1834   /* arrange atbarray into sendbuf */
1835   k = 0;
1836   for (proc=0; proc<size; proc++) {
1837     for (j=0; j<cN; j++) {
1838       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1839     }
1840   }
1841   /* sum all atbarray to local values of C */
1842   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1843   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1844   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense"
1850 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1851 {
1852   PetscErrorCode        ierr;
1853   Mat                   Cdense;
1854   MPI_Comm              comm;
1855   PetscMPIInt           size;
1856   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1857   Mat_MPIDense          *c;
1858   Mat_TransMatMultDense *atb;
1859 
1860   PetscFunctionBegin;
1861   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1862   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1863     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1864   }
1865 
1866   /* create matrix product Cdense */
1867   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1868   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1869   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1870   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1871   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1872   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1873   *C   = Cdense;
1874 
1875   /* create data structure for reuse Cdense */
1876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1877   ierr = PetscNew(&atb);CHKERRQ(ierr);
1878   cM = Cdense->rmap->N;
1879   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1880 
1881   c                    = (Mat_MPIDense*)Cdense->data;
1882   c->atbdense          = atb;
1883   atb->destroy         = Cdense->ops->destroy;
1884   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense"
1890 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1891 {
1892   PetscErrorCode ierr;
1893 
1894   PetscFunctionBegin;
1895   if (scall == MAT_INITIAL_MATRIX) {
1896     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1897   }
1898   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense"
1904 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1905 {
1906   PetscErrorCode   ierr;
1907   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1908   Mat_MatMultDense *ab = a->abdense;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1912   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1913   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1914 
1915   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1916   ierr = PetscFree(ab);CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #if defined(PETSC_HAVE_ELEMENTAL)
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1923 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1924 {
1925   PetscErrorCode   ierr;
1926   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1927   Mat_MatMultDense *ab=c->abdense;
1928 
1929   PetscFunctionBegin;
1930   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1931   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1932   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1933   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1939 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1940 {
1941   PetscErrorCode   ierr;
1942   Mat              Ae,Be,Ce;
1943   Mat_MPIDense     *c;
1944   Mat_MatMultDense *ab;
1945 
1946   PetscFunctionBegin;
1947   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1948     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1949   }
1950 
1951   /* convert A and B to Elemental matrices Ae and Be */
1952   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1953   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1954 
1955   /* Ce = Ae*Be */
1956   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1957   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1958 
1959   /* convert Ce to C */
1960   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1961 
1962   /* create data structure for reuse Cdense */
1963   ierr = PetscNew(&ab);CHKERRQ(ierr);
1964   c                  = (Mat_MPIDense*)(*C)->data;
1965   c->abdense         = ab;
1966 
1967   ab->Ae             = Ae;
1968   ab->Be             = Be;
1969   ab->Ce             = Ce;
1970   ab->destroy        = (*C)->ops->destroy;
1971   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1972   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 #undef __FUNCT__
1977 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1978 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1979 {
1980   PetscErrorCode ierr;
1981 
1982   PetscFunctionBegin;
1983   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1984     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1985     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1986     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1987   } else {
1988     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1989     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1990     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1991   }
1992   PetscFunctionReturn(0);
1993 }
1994 #endif
1995