1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseGetLocalMatrix" 13 /*@ 14 15 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the Seq or MPI dense matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 @*/ 27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 28 { 29 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 30 PetscErrorCode ierr; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 35 if (flg) *B = mat->A; 36 else *B = A; 37 PetscFunctionReturn(0); 38 } 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatGetRow_MPIDense" 42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 43 { 44 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 45 PetscErrorCode ierr; 46 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 47 48 PetscFunctionBegin; 49 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 50 lrow = row - rstart; 51 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 55 #undef __FUNCT__ 56 #define __FUNCT__ "MatRestoreRow_MPIDense" 57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 58 { 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 63 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 Mat B; 77 78 PetscFunctionBegin; 79 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 80 81 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 82 if (!B) { 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 for (i=0; i<m; i++) { 110 if (idxm[i] < 0) continue; 111 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 112 if (idxm[i] >= rstart && idxm[i] < rend) { 113 row = idxm[i] - rstart; 114 if (roworiented) { 115 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 116 } else { 117 for (j=0; j<n; j++) { 118 if (idxn[j] < 0) continue; 119 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 120 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 121 } 122 } 123 } else if (!A->donotstash) { 124 mat->assembled = PETSC_FALSE; 125 if (roworiented) { 126 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 127 } else { 128 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetValues_MPIDense" 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatDenseGetArray_MPIDense" 161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 174 { 175 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 176 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 177 PetscErrorCode ierr; 178 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 179 const PetscInt *irow,*icol; 180 PetscScalar *av,*bv,*v = lmat->v; 181 Mat newmat; 182 IS iscol_local; 183 184 PetscFunctionBegin; 185 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 186 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 187 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 190 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 191 192 /* No parallel redistribution currently supported! Should really check each index set 193 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 194 original matrix! */ 195 196 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 197 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 198 199 /* Check submatrix call */ 200 if (scall == MAT_REUSE_MATRIX) { 201 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 202 /* Really need to test rows and column sizes! */ 203 newmat = *B; 204 } else { 205 /* Create and fill new matrix */ 206 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 207 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 208 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 209 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 210 } 211 212 /* Now extract the data pointers and do the copy, column at a time */ 213 newmatd = (Mat_MPIDense*)newmat->data; 214 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 215 216 for (i=0; i<Ncols; i++) { 217 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 218 for (j=0; j<nrows; j++) { 219 *bv++ = av[irow[j] - rstart]; 220 } 221 } 222 223 /* Assemble the matrices so that the correct flags are set */ 224 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 227 /* Free work space */ 228 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 230 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 231 *B = newmat; 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 280 PetscFunctionBegin; 281 /* wait on receives */ 282 while (1) { 283 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 284 if (!flg) break; 285 286 for (i=0; i<n;) { 287 /* Now identify the consecutive vals belonging to the same row */ 288 for (j=i,rstart=row[j]; j<n; j++) { 289 if (row[j] != rstart) break; 290 } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap->range; 334 PetscInt *sizes,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscBool found; 343 const PetscScalar *xx; 344 PetscScalar *bb; 345 346 PetscFunctionBegin; 347 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 348 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 352 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 353 for (i=0; i<N; i++) { 354 idx = rows[i]; 355 found = PETSC_FALSE; 356 for (j=0; j<size; j++) { 357 if (idx >= owners[j] && idx < owners[j+1]) { 358 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 359 } 360 } 361 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 362 } 363 nsends = 0; 364 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 365 366 /* inform other processors of number of messages and max length*/ 367 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 368 369 /* post receives: */ 370 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 371 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 372 for (i=0; i<nrecvs; i++) { 373 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 374 } 375 376 /* do sends: 377 1) starts[i] gives the starting index in svalues for stuff going to 378 the ith processor 379 */ 380 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 381 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 382 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 383 384 starts[0] = 0; 385 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 386 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 387 388 starts[0] = 0; 389 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 390 count = 0; 391 for (i=0; i<size; i++) { 392 if (sizes[2*i+1]) { 393 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 394 } 395 } 396 ierr = PetscFree(starts);CHKERRQ(ierr); 397 398 base = owners[rank]; 399 400 /* wait on receives */ 401 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 402 count = nrecvs; 403 slen = 0; 404 while (count) { 405 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 406 /* unpack receives into our local space */ 407 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 408 409 source[imdex] = recv_status.MPI_SOURCE; 410 lens[imdex] = n; 411 slen += n; 412 count--; 413 } 414 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 415 416 /* move the data into the send scatter */ 417 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 418 count = 0; 419 for (i=0; i<nrecvs; i++) { 420 values = rvalues + i*nmax; 421 for (j=0; j<lens[i]; j++) { 422 lrows[count++] = values[j] - base; 423 } 424 } 425 ierr = PetscFree(rvalues);CHKERRQ(ierr); 426 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 427 ierr = PetscFree(owner);CHKERRQ(ierr); 428 ierr = PetscFree(sizes);CHKERRQ(ierr); 429 430 /* fix right hand side if needed */ 431 if (x && b) { 432 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 433 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 434 for (i=0; i<slen; i++) { 435 bb[lrows[i]] = diag*xx[lrows[i]]; 436 } 437 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 439 } 440 441 /* actually zap the local rows */ 442 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 443 if (diag != 0.0) { 444 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 445 PetscInt m = ll->lda, i; 446 447 for (i=0; i<slen; i++) { 448 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 449 } 450 } 451 ierr = PetscFree(lrows);CHKERRQ(ierr); 452 453 /* wait on sends */ 454 if (nsends) { 455 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 456 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 457 ierr = PetscFree(send_status);CHKERRQ(ierr); 458 } 459 ierr = PetscFree(send_waits);CHKERRQ(ierr); 460 ierr = PetscFree(svalues);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 465 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 466 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 467 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMult_MPIDense" 471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatMultAdd_MPIDense" 485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 488 PetscErrorCode ierr; 489 490 PetscFunctionBegin; 491 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatMultTranspose_MPIDense" 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 PetscErrorCode ierr; 503 PetscScalar zero = 0.0; 504 505 PetscFunctionBegin; 506 ierr = VecSet(yy,zero);CHKERRQ(ierr); 507 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 508 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 516 { 517 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 518 PetscErrorCode ierr; 519 520 PetscFunctionBegin; 521 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 522 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 523 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatGetDiagonal_MPIDense" 530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 534 PetscErrorCode ierr; 535 PetscInt len,i,n,m = A->rmap->n,radd; 536 PetscScalar *x,zero = 0.0; 537 538 PetscFunctionBegin; 539 ierr = VecSet(v,zero);CHKERRQ(ierr); 540 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 541 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 542 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 543 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 544 radd = A->rmap->rstart*m; 545 for (i=0; i<len; i++) { 546 x[i] = aloc->v[radd + i*m + i]; 547 } 548 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatDestroy_MPIDense" 554 PetscErrorCode MatDestroy_MPIDense(Mat mat) 555 { 556 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 557 PetscErrorCode ierr; 558 559 PetscFunctionBegin; 560 #if defined(PETSC_USE_LOG) 561 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 562 #endif 563 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 564 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 565 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 566 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 567 568 ierr = PetscFree(mat->data);CHKERRQ(ierr); 569 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 570 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 573 #if defined(PETSC_HAVE_ELEMENTAL) 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 575 #endif 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatView_MPIDense_Binary" 589 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 590 { 591 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 592 PetscErrorCode ierr; 593 PetscViewerFormat format; 594 int fd; 595 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 596 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 597 PetscScalar *work,*v,*vv; 598 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 599 600 PetscFunctionBegin; 601 if (mdn->size == 1) { 602 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 603 } else { 604 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 605 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 606 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 607 608 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 609 if (format == PETSC_VIEWER_NATIVE) { 610 611 if (!rank) { 612 /* store the matrix as a dense matrix */ 613 header[0] = MAT_FILE_CLASSID; 614 header[1] = mat->rmap->N; 615 header[2] = N; 616 header[3] = MATRIX_BINARY_FORMAT_DENSE; 617 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 618 619 /* get largest work array needed for transposing array */ 620 mmax = mat->rmap->n; 621 for (i=1; i<size; i++) { 622 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 623 } 624 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 625 626 /* write out local array, by rows */ 627 m = mat->rmap->n; 628 v = a->v; 629 for (j=0; j<N; j++) { 630 for (i=0; i<m; i++) { 631 work[j + i*N] = *v++; 632 } 633 } 634 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 635 /* get largest work array to receive messages from other processes, excludes process zero */ 636 mmax = 0; 637 for (i=1; i<size; i++) { 638 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 639 } 640 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 641 for (k = 1; k < size; k++) { 642 v = vv; 643 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 644 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 645 646 for (j = 0; j < N; j++) { 647 for (i = 0; i < m; i++) { 648 work[j + i*N] = *v++; 649 } 650 } 651 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 652 } 653 ierr = PetscFree(work);CHKERRQ(ierr); 654 ierr = PetscFree(vv);CHKERRQ(ierr); 655 } else { 656 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 } 658 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 659 } 660 PetscFunctionReturn(0); 661 } 662 663 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 664 #include <petscdraw.h> 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 667 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 668 { 669 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 670 PetscErrorCode ierr; 671 PetscMPIInt rank = mdn->rank; 672 PetscViewerType vtype; 673 PetscBool iascii,isdraw; 674 PetscViewer sviewer; 675 PetscViewerFormat format; 676 677 PetscFunctionBegin; 678 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 679 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 680 if (iascii) { 681 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 686 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 688 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 689 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 690 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } else if (format == PETSC_VIEWER_ASCII_INFO) { 693 PetscFunctionReturn(0); 694 } 695 } else if (isdraw) { 696 PetscDraw draw; 697 PetscBool isnull; 698 699 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 700 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 701 if (isnull) PetscFunctionReturn(0); 702 } 703 704 { 705 /* assemble the entire matrix onto first processor. */ 706 Mat A; 707 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 708 PetscInt *cols; 709 PetscScalar *vals; 710 711 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 712 if (!rank) { 713 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 714 } else { 715 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 716 } 717 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 718 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 719 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 720 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 721 722 /* Copy the matrix ... This isn't the most efficient means, 723 but it's quick for now */ 724 A->insertmode = INSERT_VALUES; 725 726 row = mat->rmap->rstart; 727 m = mdn->A->rmap->n; 728 for (i=0; i<m; i++) { 729 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 730 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 731 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 732 row++; 733 } 734 735 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 738 if (!rank) { 739 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 740 } 741 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 742 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 743 ierr = MatDestroy(&A);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatView_MPIDense" 750 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 751 { 752 PetscErrorCode ierr; 753 PetscBool iascii,isbinary,isdraw,issocket; 754 755 PetscFunctionBegin; 756 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 757 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 759 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 760 761 if (iascii || issocket || isdraw) { 762 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 763 } else if (isbinary) { 764 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 765 } 766 PetscFunctionReturn(0); 767 } 768 769 #undef __FUNCT__ 770 #define __FUNCT__ "MatGetInfo_MPIDense" 771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 772 { 773 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 774 Mat mdn = mat->A; 775 PetscErrorCode ierr; 776 PetscReal isend[5],irecv[5]; 777 778 PetscFunctionBegin; 779 info->block_size = 1.0; 780 781 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 782 783 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 784 isend[3] = info->memory; isend[4] = info->mallocs; 785 if (flag == MAT_LOCAL) { 786 info->nz_used = isend[0]; 787 info->nz_allocated = isend[1]; 788 info->nz_unneeded = isend[2]; 789 info->memory = isend[3]; 790 info->mallocs = isend[4]; 791 } else if (flag == MAT_GLOBAL_MAX) { 792 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 793 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } else if (flag == MAT_GLOBAL_SUM) { 800 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 801 802 info->nz_used = irecv[0]; 803 info->nz_allocated = irecv[1]; 804 info->nz_unneeded = irecv[2]; 805 info->memory = irecv[3]; 806 info->mallocs = irecv[4]; 807 } 808 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 809 info->fill_ratio_needed = 0; 810 info->factor_mallocs = 0; 811 PetscFunctionReturn(0); 812 } 813 814 #undef __FUNCT__ 815 #define __FUNCT__ "MatSetOption_MPIDense" 816 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 817 { 818 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 819 PetscErrorCode ierr; 820 821 PetscFunctionBegin; 822 switch (op) { 823 case MAT_NEW_NONZERO_LOCATIONS: 824 case MAT_NEW_NONZERO_LOCATION_ERR: 825 case MAT_NEW_NONZERO_ALLOCATION_ERR: 826 MatCheckPreallocated(A,1); 827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 828 break; 829 case MAT_ROW_ORIENTED: 830 MatCheckPreallocated(A,1); 831 a->roworiented = flg; 832 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 833 break; 834 case MAT_NEW_DIAGONALS: 835 case MAT_KEEP_NONZERO_PATTERN: 836 case MAT_USE_HASH_TABLE: 837 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 838 break; 839 case MAT_IGNORE_OFF_PROC_ENTRIES: 840 a->donotstash = flg; 841 break; 842 case MAT_SYMMETRIC: 843 case MAT_STRUCTURALLY_SYMMETRIC: 844 case MAT_HERMITIAN: 845 case MAT_SYMMETRY_ETERNAL: 846 case MAT_IGNORE_LOWER_TRIANGULAR: 847 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 848 break; 849 default: 850 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 856 #undef __FUNCT__ 857 #define __FUNCT__ "MatDiagonalScale_MPIDense" 858 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 859 { 860 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 861 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 862 PetscScalar *l,*r,x,*v; 863 PetscErrorCode ierr; 864 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 865 866 PetscFunctionBegin; 867 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 868 if (ll) { 869 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 870 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 871 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 872 for (i=0; i<m; i++) { 873 x = l[i]; 874 v = mat->v + i; 875 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 876 } 877 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 878 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 879 } 880 if (rr) { 881 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 882 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 883 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 886 for (i=0; i<n; i++) { 887 x = r[i]; 888 v = mat->v + i*m; 889 for (j=0; j<m; j++) (*v++) *= x; 890 } 891 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 892 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 893 } 894 PetscFunctionReturn(0); 895 } 896 897 #undef __FUNCT__ 898 #define __FUNCT__ "MatNorm_MPIDense" 899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 900 { 901 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 902 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 903 PetscErrorCode ierr; 904 PetscInt i,j; 905 PetscReal sum = 0.0; 906 PetscScalar *v = mat->v; 907 908 PetscFunctionBegin; 909 if (mdn->size == 1) { 910 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 911 } else { 912 if (type == NORM_FROBENIUS) { 913 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 914 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 915 } 916 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 917 *nrm = PetscSqrtReal(*nrm); 918 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 919 } else if (type == NORM_1) { 920 PetscReal *tmp,*tmp2; 921 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 922 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 923 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 924 *nrm = 0.0; 925 v = mat->v; 926 for (j=0; j<mdn->A->cmap->n; j++) { 927 for (i=0; i<mdn->A->rmap->n; i++) { 928 tmp[j] += PetscAbsScalar(*v); v++; 929 } 930 } 931 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 932 for (j=0; j<A->cmap->N; j++) { 933 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 934 } 935 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 936 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 937 } else if (type == NORM_INFINITY) { /* max row norm */ 938 PetscReal ntemp; 939 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 940 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 942 } 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatTranspose_MPIDense" 948 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 949 { 950 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 951 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 952 Mat B; 953 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 954 PetscErrorCode ierr; 955 PetscInt j,i; 956 PetscScalar *v; 957 958 PetscFunctionBegin; 959 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 960 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 961 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 962 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 963 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 964 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 965 } else { 966 B = *matout; 967 } 968 969 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 970 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 971 for (i=0; i<m; i++) rwork[i] = rstart + i; 972 for (j=0; j<n; j++) { 973 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 974 v += m; 975 } 976 ierr = PetscFree(rwork);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 980 *matout = B; 981 } else { 982 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 983 } 984 PetscFunctionReturn(0); 985 } 986 987 988 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 989 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatSetUp_MPIDense" 993 PetscErrorCode MatSetUp_MPIDense(Mat A) 994 { 995 PetscErrorCode ierr; 996 997 PetscFunctionBegin; 998 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatAXPY_MPIDense" 1004 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1005 { 1006 PetscErrorCode ierr; 1007 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1008 1009 PetscFunctionBegin; 1010 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1011 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatConjugate_MPIDense" 1017 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1018 { 1019 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1020 PetscErrorCode ierr; 1021 1022 PetscFunctionBegin; 1023 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatRealPart_MPIDense" 1029 PetscErrorCode MatRealPart_MPIDense(Mat A) 1030 { 1031 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1032 PetscErrorCode ierr; 1033 1034 PetscFunctionBegin; 1035 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1041 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1042 { 1043 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1044 PetscErrorCode ierr; 1045 1046 PetscFunctionBegin; 1047 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1054 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1055 { 1056 PetscErrorCode ierr; 1057 PetscInt i,n; 1058 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1059 PetscReal *work; 1060 1061 PetscFunctionBegin; 1062 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1063 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1064 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1065 if (type == NORM_2) { 1066 for (i=0; i<n; i++) work[i] *= work[i]; 1067 } 1068 if (type == NORM_INFINITY) { 1069 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1070 } else { 1071 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1072 } 1073 ierr = PetscFree(work);CHKERRQ(ierr); 1074 if (type == NORM_2) { 1075 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1076 } 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatSetRandom_MPIDense" 1082 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1083 { 1084 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1085 PetscErrorCode ierr; 1086 PetscScalar *a; 1087 PetscInt m,n,i; 1088 1089 PetscFunctionBegin; 1090 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1091 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1092 for (i=0; i<m*n; i++) { 1093 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1094 } 1095 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMissingDiagonal_MPIDense" 1103 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1104 { 1105 PetscFunctionBegin; 1106 *missing = PETSC_FALSE; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 /* -------------------------------------------------------------------*/ 1111 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1112 MatGetRow_MPIDense, 1113 MatRestoreRow_MPIDense, 1114 MatMult_MPIDense, 1115 /* 4*/ MatMultAdd_MPIDense, 1116 MatMultTranspose_MPIDense, 1117 MatMultTransposeAdd_MPIDense, 1118 0, 1119 0, 1120 0, 1121 /* 10*/ 0, 1122 0, 1123 0, 1124 0, 1125 MatTranspose_MPIDense, 1126 /* 15*/ MatGetInfo_MPIDense, 1127 MatEqual_MPIDense, 1128 MatGetDiagonal_MPIDense, 1129 MatDiagonalScale_MPIDense, 1130 MatNorm_MPIDense, 1131 /* 20*/ MatAssemblyBegin_MPIDense, 1132 MatAssemblyEnd_MPIDense, 1133 MatSetOption_MPIDense, 1134 MatZeroEntries_MPIDense, 1135 /* 24*/ MatZeroRows_MPIDense, 1136 0, 1137 0, 1138 0, 1139 0, 1140 /* 29*/ MatSetUp_MPIDense, 1141 0, 1142 0, 1143 0, 1144 0, 1145 /* 34*/ MatDuplicate_MPIDense, 1146 0, 1147 0, 1148 0, 1149 0, 1150 /* 39*/ MatAXPY_MPIDense, 1151 MatGetSubMatrices_MPIDense, 1152 0, 1153 MatGetValues_MPIDense, 1154 0, 1155 /* 44*/ 0, 1156 MatScale_MPIDense, 1157 MatShift_Basic, 1158 0, 1159 0, 1160 /* 49*/ MatSetRandom_MPIDense, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /* 54*/ 0, 1166 0, 1167 0, 1168 0, 1169 0, 1170 /* 59*/ MatGetSubMatrix_MPIDense, 1171 MatDestroy_MPIDense, 1172 MatView_MPIDense, 1173 0, 1174 0, 1175 /* 64*/ 0, 1176 0, 1177 0, 1178 0, 1179 0, 1180 /* 69*/ 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /* 74*/ 0, 1186 0, 1187 0, 1188 0, 1189 0, 1190 /* 79*/ 0, 1191 0, 1192 0, 1193 0, 1194 /* 83*/ MatLoad_MPIDense, 1195 0, 1196 0, 1197 0, 1198 0, 1199 0, 1200 #if defined(PETSC_HAVE_ELEMENTAL) 1201 /* 89*/ MatMatMult_MPIDense_MPIDense, 1202 MatMatMultSymbolic_MPIDense_MPIDense, 1203 #else 1204 /* 89*/ 0, 1205 0, 1206 #endif 1207 MatMatMultNumeric_MPIDense, 1208 0, 1209 0, 1210 /* 94*/ 0, 1211 0, 1212 0, 1213 0, 1214 0, 1215 /* 99*/ 0, 1216 0, 1217 0, 1218 MatConjugate_MPIDense, 1219 0, 1220 /*104*/ 0, 1221 MatRealPart_MPIDense, 1222 MatImaginaryPart_MPIDense, 1223 0, 1224 0, 1225 /*109*/ 0, 1226 0, 1227 0, 1228 0, 1229 MatMissingDiagonal_MPIDense, 1230 /*114*/ 0, 1231 0, 1232 0, 1233 0, 1234 0, 1235 /*119*/ 0, 1236 0, 1237 0, 1238 0, 1239 0, 1240 /*124*/ 0, 1241 MatGetColumnNorms_MPIDense, 1242 0, 1243 0, 1244 0, 1245 /*129*/ 0, 1246 MatTransposeMatMult_MPIDense_MPIDense, 1247 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1248 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1249 0, 1250 /*134*/ 0, 1251 0, 1252 0, 1253 0, 1254 0, 1255 /*139*/ 0, 1256 0, 1257 0 1258 }; 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1262 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1263 { 1264 Mat_MPIDense *a; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 mat->preallocated = PETSC_TRUE; 1269 /* Note: For now, when data is specified above, this assumes the user correctly 1270 allocates the local dense storage space. We should add error checking. */ 1271 1272 a = (Mat_MPIDense*)mat->data; 1273 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1274 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1275 a->nvec = mat->cmap->n; 1276 1277 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1278 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1279 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1280 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1281 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #if defined(PETSC_HAVE_ELEMENTAL) 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatConvert_MPIDense_Elemental" 1288 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1289 { 1290 Mat mat_elemental; 1291 PetscErrorCode ierr; 1292 PetscScalar *v; 1293 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1294 1295 PetscFunctionBegin; 1296 if (reuse == MAT_REUSE_MATRIX) { 1297 mat_elemental = *newmat; 1298 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1299 } else { 1300 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1301 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1302 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1303 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1304 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1305 } 1306 1307 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1308 for (i=0; i<N; i++) cols[i] = i; 1309 for (i=0; i<m; i++) rows[i] = rstart + i; 1310 1311 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1312 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1313 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1314 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1315 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1316 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1317 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1318 1319 if (reuse == MAT_INPLACE_MATRIX) { 1320 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1321 } else { 1322 *newmat = mat_elemental; 1323 } 1324 PetscFunctionReturn(0); 1325 } 1326 #endif 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatCreate_MPIDense" 1330 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1331 { 1332 Mat_MPIDense *a; 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1337 mat->data = (void*)a; 1338 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1339 1340 mat->insertmode = NOT_SET_VALUES; 1341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1342 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1343 1344 /* build cache for off array entries formed */ 1345 a->donotstash = PETSC_FALSE; 1346 1347 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1348 1349 /* stuff used for matrix vector multiply */ 1350 a->lvec = 0; 1351 a->Mvctx = 0; 1352 a->roworiented = PETSC_TRUE; 1353 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1356 #if defined(PETSC_HAVE_ELEMENTAL) 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1358 #endif 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1363 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1364 1365 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1366 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1367 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1368 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /*MC 1373 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1374 1375 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1376 and MATMPIDENSE otherwise. 1377 1378 Options Database Keys: 1379 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1380 1381 Level: beginner 1382 1383 1384 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1385 M*/ 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1389 /*@C 1390 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1391 1392 Not collective 1393 1394 Input Parameters: 1395 . B - the matrix 1396 - data - optional location of matrix data. Set data=NULL for PETSc 1397 to control all matrix memory allocation. 1398 1399 Notes: 1400 The dense format is fully compatible with standard Fortran 77 1401 storage by columns. 1402 1403 The data input variable is intended primarily for Fortran programmers 1404 who wish to allocate their own matrix memory space. Most users should 1405 set data=NULL. 1406 1407 Level: intermediate 1408 1409 .keywords: matrix,dense, parallel 1410 1411 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1412 @*/ 1413 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1414 { 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatCreateDense" 1424 /*@C 1425 MatCreateDense - Creates a parallel matrix in dense format. 1426 1427 Collective on MPI_Comm 1428 1429 Input Parameters: 1430 + comm - MPI communicator 1431 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1432 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1433 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1434 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1435 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1436 to control all matrix memory allocation. 1437 1438 Output Parameter: 1439 . A - the matrix 1440 1441 Notes: 1442 The dense format is fully compatible with standard Fortran 77 1443 storage by columns. 1444 1445 The data input variable is intended primarily for Fortran programmers 1446 who wish to allocate their own matrix memory space. Most users should 1447 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1448 1449 The user MUST specify either the local or global matrix dimensions 1450 (possibly both). 1451 1452 Level: intermediate 1453 1454 .keywords: matrix,dense, parallel 1455 1456 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1457 @*/ 1458 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1459 { 1460 PetscErrorCode ierr; 1461 PetscMPIInt size; 1462 1463 PetscFunctionBegin; 1464 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1465 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1466 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1467 if (size > 1) { 1468 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1469 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1470 if (data) { /* user provided data array, so no need to assemble */ 1471 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1472 (*A)->assembled = PETSC_TRUE; 1473 } 1474 } else { 1475 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1476 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } 1480 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatDuplicate_MPIDense" 1483 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1484 { 1485 Mat mat; 1486 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1487 PetscErrorCode ierr; 1488 1489 PetscFunctionBegin; 1490 *newmat = 0; 1491 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1492 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1493 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1494 a = (Mat_MPIDense*)mat->data; 1495 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1496 1497 mat->factortype = A->factortype; 1498 mat->assembled = PETSC_TRUE; 1499 mat->preallocated = PETSC_TRUE; 1500 1501 a->size = oldmat->size; 1502 a->rank = oldmat->rank; 1503 mat->insertmode = NOT_SET_VALUES; 1504 a->nvec = oldmat->nvec; 1505 a->donotstash = oldmat->donotstash; 1506 1507 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1508 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1509 1510 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1511 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1512 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1513 1514 *newmat = mat; 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1520 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1521 { 1522 PetscErrorCode ierr; 1523 PetscMPIInt rank,size; 1524 const PetscInt *rowners; 1525 PetscInt i,m,n,nz,j,mMax; 1526 PetscScalar *array,*vals,*vals_ptr; 1527 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1528 1529 PetscFunctionBegin; 1530 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1531 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1532 1533 /* determine ownership of rows and columns */ 1534 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1535 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1536 1537 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1538 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1539 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1540 } 1541 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1542 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1543 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1544 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1545 if (!rank) { 1546 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1547 1548 /* read in my part of the matrix numerical values */ 1549 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1550 1551 /* insert into matrix-by row (this is why cannot directly read into array */ 1552 vals_ptr = vals; 1553 for (i=0; i<m; i++) { 1554 for (j=0; j<N; j++) { 1555 array[i + j*m] = *vals_ptr++; 1556 } 1557 } 1558 1559 /* read in other processors and ship out */ 1560 for (i=1; i<size; i++) { 1561 nz = (rowners[i+1] - rowners[i])*N; 1562 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1563 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1564 } 1565 } else { 1566 /* receive numeric values */ 1567 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1568 1569 /* receive message of values*/ 1570 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1571 1572 /* insert into matrix-by row (this is why cannot directly read into array */ 1573 vals_ptr = vals; 1574 for (i=0; i<m; i++) { 1575 for (j=0; j<N; j++) { 1576 array[i + j*m] = *vals_ptr++; 1577 } 1578 } 1579 } 1580 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1581 ierr = PetscFree(vals);CHKERRQ(ierr); 1582 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1583 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatLoad_MPIDense" 1589 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1590 { 1591 Mat_MPIDense *a; 1592 PetscScalar *vals,*svals; 1593 MPI_Comm comm; 1594 MPI_Status status; 1595 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1596 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1597 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1598 PetscInt i,nz,j,rstart,rend; 1599 int fd; 1600 PetscErrorCode ierr; 1601 1602 PetscFunctionBegin; 1603 /* force binary viewer to load .info file if it has not yet done so */ 1604 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1605 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1607 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1608 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1609 if (!rank) { 1610 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1611 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1612 } 1613 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1614 M = header[1]; N = header[2]; nz = header[3]; 1615 1616 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1617 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1618 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1619 1620 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1621 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1622 1623 /* 1624 Handle case where matrix is stored on disk as a dense matrix 1625 */ 1626 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1627 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1628 PetscFunctionReturn(0); 1629 } 1630 1631 /* determine ownership of all rows */ 1632 if (newmat->rmap->n < 0) { 1633 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1634 } else { 1635 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1636 } 1637 if (newmat->cmap->n < 0) { 1638 n = PETSC_DECIDE; 1639 } else { 1640 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1641 } 1642 1643 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1644 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1645 rowners[0] = 0; 1646 for (i=2; i<=size; i++) { 1647 rowners[i] += rowners[i-1]; 1648 } 1649 rstart = rowners[rank]; 1650 rend = rowners[rank+1]; 1651 1652 /* distribute row lengths to all processors */ 1653 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1654 if (!rank) { 1655 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1656 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1657 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1658 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1659 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1660 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1661 } else { 1662 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1663 } 1664 1665 if (!rank) { 1666 /* calculate the number of nonzeros on each processor */ 1667 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1668 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1669 for (i=0; i<size; i++) { 1670 for (j=rowners[i]; j< rowners[i+1]; j++) { 1671 procsnz[i] += rowlengths[j]; 1672 } 1673 } 1674 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1675 1676 /* determine max buffer needed and allocate it */ 1677 maxnz = 0; 1678 for (i=0; i<size; i++) { 1679 maxnz = PetscMax(maxnz,procsnz[i]); 1680 } 1681 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1682 1683 /* read in my part of the matrix column indices */ 1684 nz = procsnz[0]; 1685 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1686 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1687 1688 /* read in every one elses and ship off */ 1689 for (i=1; i<size; i++) { 1690 nz = procsnz[i]; 1691 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1692 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1693 } 1694 ierr = PetscFree(cols);CHKERRQ(ierr); 1695 } else { 1696 /* determine buffer space needed for message */ 1697 nz = 0; 1698 for (i=0; i<m; i++) { 1699 nz += ourlens[i]; 1700 } 1701 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1702 1703 /* receive message of column indices*/ 1704 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1705 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1706 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1707 } 1708 1709 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1710 a = (Mat_MPIDense*)newmat->data; 1711 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1712 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1713 } 1714 1715 if (!rank) { 1716 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1717 1718 /* read in my part of the matrix numerical values */ 1719 nz = procsnz[0]; 1720 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1721 1722 /* insert into matrix */ 1723 jj = rstart; 1724 smycols = mycols; 1725 svals = vals; 1726 for (i=0; i<m; i++) { 1727 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1728 smycols += ourlens[i]; 1729 svals += ourlens[i]; 1730 jj++; 1731 } 1732 1733 /* read in other processors and ship out */ 1734 for (i=1; i<size; i++) { 1735 nz = procsnz[i]; 1736 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1737 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1738 } 1739 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1740 } else { 1741 /* receive numeric values */ 1742 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1743 1744 /* receive message of values*/ 1745 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1746 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1747 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1748 1749 /* insert into matrix */ 1750 jj = rstart; 1751 smycols = mycols; 1752 svals = vals; 1753 for (i=0; i<m; i++) { 1754 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1755 smycols += ourlens[i]; 1756 svals += ourlens[i]; 1757 jj++; 1758 } 1759 } 1760 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1761 ierr = PetscFree(vals);CHKERRQ(ierr); 1762 ierr = PetscFree(mycols);CHKERRQ(ierr); 1763 ierr = PetscFree(rowners);CHKERRQ(ierr); 1764 1765 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1766 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 #undef __FUNCT__ 1771 #define __FUNCT__ "MatEqual_MPIDense" 1772 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1773 { 1774 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1775 Mat a,b; 1776 PetscBool flg; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 a = matA->A; 1781 b = matB->A; 1782 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1783 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense" 1789 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1790 { 1791 PetscErrorCode ierr; 1792 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1793 Mat_TransMatMultDense *atb = a->atbdense; 1794 1795 PetscFunctionBegin; 1796 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1797 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1798 ierr = PetscFree(atb);CHKERRQ(ierr); 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense" 1804 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1805 { 1806 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1807 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1808 Mat_TransMatMultDense *atb = c->atbdense; 1809 PetscErrorCode ierr; 1810 MPI_Comm comm; 1811 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1812 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1813 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1814 PetscScalar _DOne=1.0,_DZero=0.0; 1815 PetscBLASInt am,an,bn,aN; 1816 const PetscInt *ranges; 1817 1818 PetscFunctionBegin; 1819 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1820 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1821 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1822 1823 /* compute atbarray = aseq^T * bseq */ 1824 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1825 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1826 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1827 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1828 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1829 1830 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1831 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1832 1833 /* arrange atbarray into sendbuf */ 1834 k = 0; 1835 for (proc=0; proc<size; proc++) { 1836 for (j=0; j<cN; j++) { 1837 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1838 } 1839 } 1840 /* sum all atbarray to local values of C */ 1841 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1842 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1843 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense" 1849 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1850 { 1851 PetscErrorCode ierr; 1852 Mat Cdense; 1853 MPI_Comm comm; 1854 PetscMPIInt size; 1855 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1856 Mat_MPIDense *c; 1857 Mat_TransMatMultDense *atb; 1858 1859 PetscFunctionBegin; 1860 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1861 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1862 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1863 } 1864 1865 /* create matrix product Cdense */ 1866 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1867 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1868 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1869 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1870 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1871 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872 *C = Cdense; 1873 1874 /* create data structure for reuse Cdense */ 1875 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1876 ierr = PetscNew(&atb);CHKERRQ(ierr); 1877 cM = Cdense->rmap->N; 1878 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1879 1880 c = (Mat_MPIDense*)Cdense->data; 1881 c->atbdense = atb; 1882 atb->destroy = Cdense->ops->destroy; 1883 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense" 1889 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1890 { 1891 PetscErrorCode ierr; 1892 1893 PetscFunctionBegin; 1894 if (scall == MAT_INITIAL_MATRIX) { 1895 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1896 } 1897 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense" 1903 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1904 { 1905 PetscErrorCode ierr; 1906 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1907 Mat_MatMultDense *ab = a->abdense; 1908 1909 PetscFunctionBegin; 1910 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1911 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1912 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1913 1914 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1915 ierr = PetscFree(ab);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #if defined(PETSC_HAVE_ELEMENTAL) 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1922 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1923 { 1924 PetscErrorCode ierr; 1925 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1926 Mat_MatMultDense *ab=c->abdense; 1927 1928 PetscFunctionBegin; 1929 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1930 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1931 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1932 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNCT__ 1937 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1938 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1939 { 1940 PetscErrorCode ierr; 1941 Mat Ae,Be,Ce; 1942 Mat_MPIDense *c; 1943 Mat_MatMultDense *ab; 1944 1945 PetscFunctionBegin; 1946 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1947 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1948 } 1949 1950 /* convert A and B to Elemental matrices Ae and Be */ 1951 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1952 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1953 1954 /* Ce = Ae*Be */ 1955 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1956 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1957 1958 /* convert Ce to C */ 1959 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1960 1961 /* create data structure for reuse Cdense */ 1962 ierr = PetscNew(&ab);CHKERRQ(ierr); 1963 c = (Mat_MPIDense*)(*C)->data; 1964 c->abdense = ab; 1965 1966 ab->Ae = Ae; 1967 ab->Be = Be; 1968 ab->Ce = Ce; 1969 ab->destroy = (*C)->ops->destroy; 1970 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1971 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1977 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1978 { 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1983 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1984 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1985 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1986 } else { 1987 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1988 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1989 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1990 } 1991 PetscFunctionReturn(0); 1992 } 1993 #endif 1994