1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 188 { 189 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 190 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 191 PetscErrorCode ierr; 192 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 193 const PetscInt *irow,*icol; 194 PetscScalar *av,*bv,*v = lmat->v; 195 Mat newmat; 196 IS iscol_local; 197 198 PetscFunctionBegin; 199 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 200 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 201 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 202 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 203 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 204 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 205 206 /* No parallel redistribution currently supported! Should really check each index set 207 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 208 original matrix! */ 209 210 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 211 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 212 213 /* Check submatrix call */ 214 if (scall == MAT_REUSE_MATRIX) { 215 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 216 /* Really need to test rows and column sizes! */ 217 newmat = *B; 218 } else { 219 /* Create and fill new matrix */ 220 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 221 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 222 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 223 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 224 } 225 226 /* Now extract the data pointers and do the copy, column at a time */ 227 newmatd = (Mat_MPIDense*)newmat->data; 228 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 229 230 for (i=0; i<Ncols; i++) { 231 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 232 for (j=0; j<nrows; j++) { 233 *bv++ = av[irow[j] - rstart]; 234 } 235 } 236 237 /* Assemble the matrices so that the correct flags are set */ 238 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 241 /* Free work space */ 242 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 243 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 244 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 245 *B = newmat; 246 PetscFunctionReturn(0); 247 } 248 249 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 260 { 261 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 262 PetscErrorCode ierr; 263 264 PetscFunctionBegin; 265 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 272 MPI_Comm comm; 273 PetscErrorCode ierr; 274 PetscInt nstash,reallocs; 275 InsertMode addv; 276 277 PetscFunctionBegin; 278 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 279 /* make sure all processors are either in INSERTMODE or ADDMODE */ 280 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 281 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 282 mat->insertmode = addv; /* in case this processor had no cache */ 283 284 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 285 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 286 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 291 { 292 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 293 PetscErrorCode ierr; 294 PetscInt i,*row,*col,flg,j,rstart,ncols; 295 PetscMPIInt n; 296 PetscScalar *val; 297 298 PetscFunctionBegin; 299 /* wait on receives */ 300 while (1) { 301 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 302 if (!flg) break; 303 304 for (i=0; i<n;) { 305 /* Now identify the consecutive vals belonging to the same row */ 306 for (j=i,rstart=row[j]; j<n; j++) { 307 if (row[j] != rstart) break; 308 } 309 if (j < n) ncols = j-i; 310 else ncols = n-i; 311 /* Now assemble all these values with a single function call */ 312 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 313 i = j; 314 } 315 } 316 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 320 321 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 322 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 328 { 329 PetscErrorCode ierr; 330 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 331 332 PetscFunctionBegin; 333 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 /* the code does not do the diagonal entries correctly unless the 338 matrix is square and the column and row owerships are identical. 339 This is a BUG. The only way to fix it seems to be to access 340 mdn->A and mdn->B directly and not through the MatZeroRows() 341 routine. 342 */ 343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 344 { 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 PetscErrorCode ierr; 347 PetscInt i,*owners = A->rmap->range; 348 PetscInt *sizes,j,idx,nsends; 349 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 350 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 351 PetscInt *lens,*lrows,*values; 352 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 353 MPI_Comm comm; 354 MPI_Request *send_waits,*recv_waits; 355 MPI_Status recv_status,*send_status; 356 PetscBool found; 357 const PetscScalar *xx; 358 PetscScalar *bb; 359 360 PetscFunctionBegin; 361 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 362 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 363 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 364 /* first count number of contributors to each processor */ 365 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 366 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 367 for (i=0; i<N; i++) { 368 idx = rows[i]; 369 found = PETSC_FALSE; 370 for (j=0; j<size; j++) { 371 if (idx >= owners[j] && idx < owners[j+1]) { 372 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 373 } 374 } 375 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 376 } 377 nsends = 0; 378 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 379 380 /* inform other processors of number of messages and max length*/ 381 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 382 383 /* post receives: */ 384 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 385 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 386 for (i=0; i<nrecvs; i++) { 387 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 388 } 389 390 /* do sends: 391 1) starts[i] gives the starting index in svalues for stuff going to 392 the ith processor 393 */ 394 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 395 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 396 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 397 398 starts[0] = 0; 399 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 400 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 401 402 starts[0] = 0; 403 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 404 count = 0; 405 for (i=0; i<size; i++) { 406 if (sizes[2*i+1]) { 407 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 408 } 409 } 410 ierr = PetscFree(starts);CHKERRQ(ierr); 411 412 base = owners[rank]; 413 414 /* wait on receives */ 415 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 416 count = nrecvs; 417 slen = 0; 418 while (count) { 419 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 420 /* unpack receives into our local space */ 421 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 422 423 source[imdex] = recv_status.MPI_SOURCE; 424 lens[imdex] = n; 425 slen += n; 426 count--; 427 } 428 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 429 430 /* move the data into the send scatter */ 431 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 432 count = 0; 433 for (i=0; i<nrecvs; i++) { 434 values = rvalues + i*nmax; 435 for (j=0; j<lens[i]; j++) { 436 lrows[count++] = values[j] - base; 437 } 438 } 439 ierr = PetscFree(rvalues);CHKERRQ(ierr); 440 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 441 ierr = PetscFree(owner);CHKERRQ(ierr); 442 ierr = PetscFree(sizes);CHKERRQ(ierr); 443 444 /* fix right hand side if needed */ 445 if (x && b) { 446 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 447 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 448 for (i=0; i<slen; i++) { 449 bb[lrows[i]] = diag*xx[lrows[i]]; 450 } 451 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 452 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 453 } 454 455 /* actually zap the local rows */ 456 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 457 if (diag != 0.0) { 458 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 459 PetscInt m = ll->lda, i; 460 461 for (i=0; i<slen; i++) { 462 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 463 } 464 } 465 ierr = PetscFree(lrows);CHKERRQ(ierr); 466 467 /* wait on sends */ 468 if (nsends) { 469 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 470 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 471 ierr = PetscFree(send_status);CHKERRQ(ierr); 472 } 473 ierr = PetscFree(send_waits);CHKERRQ(ierr); 474 ierr = PetscFree(svalues);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 479 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 480 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 481 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 482 483 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 484 { 485 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 496 { 497 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 498 PetscErrorCode ierr; 499 500 PetscFunctionBegin; 501 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 502 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 503 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 508 { 509 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 510 PetscErrorCode ierr; 511 PetscScalar zero = 0.0; 512 513 PetscFunctionBegin; 514 ierr = VecSet(yy,zero);CHKERRQ(ierr); 515 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 516 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 522 { 523 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 524 PetscErrorCode ierr; 525 526 PetscFunctionBegin; 527 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 528 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 529 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 530 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 535 { 536 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 537 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 538 PetscErrorCode ierr; 539 PetscInt len,i,n,m = A->rmap->n,radd; 540 PetscScalar *x,zero = 0.0; 541 542 PetscFunctionBegin; 543 ierr = VecSet(v,zero);CHKERRQ(ierr); 544 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 545 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 546 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 547 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 548 radd = A->rmap->rstart*m; 549 for (i=0; i<len; i++) { 550 x[i] = aloc->v[radd + i*m + i]; 551 } 552 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 553 PetscFunctionReturn(0); 554 } 555 556 PetscErrorCode MatDestroy_MPIDense(Mat mat) 557 { 558 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 559 PetscErrorCode ierr; 560 561 PetscFunctionBegin; 562 #if defined(PETSC_USE_LOG) 563 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 564 #endif 565 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 566 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 567 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 568 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 569 570 ierr = PetscFree(mat->data);CHKERRQ(ierr); 571 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 572 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 579 #if defined(PETSC_HAVE_ELEMENTAL) 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 581 #endif 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 589 PetscFunctionReturn(0); 590 } 591 592 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 593 { 594 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 595 PetscErrorCode ierr; 596 PetscViewerFormat format; 597 int fd; 598 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 599 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 600 PetscScalar *work,*v,*vv; 601 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 602 603 PetscFunctionBegin; 604 if (mdn->size == 1) { 605 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 606 } else { 607 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 608 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 609 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 610 611 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 612 if (format == PETSC_VIEWER_NATIVE) { 613 614 if (!rank) { 615 /* store the matrix as a dense matrix */ 616 header[0] = MAT_FILE_CLASSID; 617 header[1] = mat->rmap->N; 618 header[2] = N; 619 header[3] = MATRIX_BINARY_FORMAT_DENSE; 620 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 621 622 /* get largest work array needed for transposing array */ 623 mmax = mat->rmap->n; 624 for (i=1; i<size; i++) { 625 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 626 } 627 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 628 629 /* write out local array, by rows */ 630 m = mat->rmap->n; 631 v = a->v; 632 for (j=0; j<N; j++) { 633 for (i=0; i<m; i++) { 634 work[j + i*N] = *v++; 635 } 636 } 637 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 638 /* get largest work array to receive messages from other processes, excludes process zero */ 639 mmax = 0; 640 for (i=1; i<size; i++) { 641 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 642 } 643 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 644 for (k = 1; k < size; k++) { 645 v = vv; 646 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 647 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 649 for (j = 0; j < N; j++) { 650 for (i = 0; i < m; i++) { 651 work[j + i*N] = *v++; 652 } 653 } 654 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 655 } 656 ierr = PetscFree(work);CHKERRQ(ierr); 657 ierr = PetscFree(vv);CHKERRQ(ierr); 658 } else { 659 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 660 } 661 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 662 } 663 PetscFunctionReturn(0); 664 } 665 666 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 667 #include <petscdraw.h> 668 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 671 PetscErrorCode ierr; 672 PetscMPIInt rank = mdn->rank; 673 PetscViewerType vtype; 674 PetscBool iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 678 PetscFunctionBegin; 679 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 680 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 681 if (iascii) { 682 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 683 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 684 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 685 MatInfo info; 686 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 687 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 688 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 689 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 690 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 691 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } else if (format == PETSC_VIEWER_ASCII_INFO) { 694 PetscFunctionReturn(0); 695 } 696 } else if (isdraw) { 697 PetscDraw draw; 698 PetscBool isnull; 699 700 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 701 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 702 if (isnull) PetscFunctionReturn(0); 703 } 704 705 { 706 /* assemble the entire matrix onto first processor. */ 707 Mat A; 708 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 709 PetscInt *cols; 710 PetscScalar *vals; 711 712 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 713 if (!rank) { 714 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 715 } else { 716 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 717 } 718 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 719 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 720 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 721 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 722 723 /* Copy the matrix ... This isn't the most efficient means, 724 but it's quick for now */ 725 A->insertmode = INSERT_VALUES; 726 727 row = mat->rmap->rstart; 728 m = mdn->A->rmap->n; 729 for (i=0; i<m; i++) { 730 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 731 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 732 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 733 row++; 734 } 735 736 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 738 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 739 if (!rank) { 740 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 741 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 742 } 743 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 744 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 745 ierr = MatDestroy(&A);CHKERRQ(ierr); 746 } 747 PetscFunctionReturn(0); 748 } 749 750 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 751 { 752 PetscErrorCode ierr; 753 PetscBool iascii,isbinary,isdraw,issocket; 754 755 PetscFunctionBegin; 756 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 757 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 759 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 760 761 if (iascii || issocket || isdraw) { 762 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 763 } else if (isbinary) { 764 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 765 } 766 PetscFunctionReturn(0); 767 } 768 769 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 770 { 771 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 772 Mat mdn = mat->A; 773 PetscErrorCode ierr; 774 PetscReal isend[5],irecv[5]; 775 776 PetscFunctionBegin; 777 info->block_size = 1.0; 778 779 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 780 781 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 782 isend[3] = info->memory; isend[4] = info->mallocs; 783 if (flag == MAT_LOCAL) { 784 info->nz_used = isend[0]; 785 info->nz_allocated = isend[1]; 786 info->nz_unneeded = isend[2]; 787 info->memory = isend[3]; 788 info->mallocs = isend[4]; 789 } else if (flag == MAT_GLOBAL_MAX) { 790 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 791 792 info->nz_used = irecv[0]; 793 info->nz_allocated = irecv[1]; 794 info->nz_unneeded = irecv[2]; 795 info->memory = irecv[3]; 796 info->mallocs = irecv[4]; 797 } else if (flag == MAT_GLOBAL_SUM) { 798 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 799 800 info->nz_used = irecv[0]; 801 info->nz_allocated = irecv[1]; 802 info->nz_unneeded = irecv[2]; 803 info->memory = irecv[3]; 804 info->mallocs = irecv[4]; 805 } 806 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 807 info->fill_ratio_needed = 0; 808 info->factor_mallocs = 0; 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 813 { 814 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 switch (op) { 819 case MAT_NEW_NONZERO_LOCATIONS: 820 case MAT_NEW_NONZERO_LOCATION_ERR: 821 case MAT_NEW_NONZERO_ALLOCATION_ERR: 822 MatCheckPreallocated(A,1); 823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 824 break; 825 case MAT_ROW_ORIENTED: 826 MatCheckPreallocated(A,1); 827 a->roworiented = flg; 828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 829 break; 830 case MAT_NEW_DIAGONALS: 831 case MAT_KEEP_NONZERO_PATTERN: 832 case MAT_USE_HASH_TABLE: 833 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 834 break; 835 case MAT_IGNORE_OFF_PROC_ENTRIES: 836 a->donotstash = flg; 837 break; 838 case MAT_SYMMETRIC: 839 case MAT_STRUCTURALLY_SYMMETRIC: 840 case MAT_HERMITIAN: 841 case MAT_SYMMETRY_ETERNAL: 842 case MAT_IGNORE_LOWER_TRIANGULAR: 843 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 844 break; 845 default: 846 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 852 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 853 { 854 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 855 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 856 const PetscScalar *l,*r; 857 PetscScalar x,*v; 858 PetscErrorCode ierr; 859 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 860 861 PetscFunctionBegin; 862 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 863 if (ll) { 864 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 865 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 866 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 867 for (i=0; i<m; i++) { 868 x = l[i]; 869 v = mat->v + i; 870 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 871 } 872 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 873 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 874 } 875 if (rr) { 876 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 877 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 878 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 881 for (i=0; i<n; i++) { 882 x = r[i]; 883 v = mat->v + i*m; 884 for (j=0; j<m; j++) (*v++) *= x; 885 } 886 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 887 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 888 } 889 PetscFunctionReturn(0); 890 } 891 892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 893 { 894 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 895 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 896 PetscErrorCode ierr; 897 PetscInt i,j; 898 PetscReal sum = 0.0; 899 PetscScalar *v = mat->v; 900 901 PetscFunctionBegin; 902 if (mdn->size == 1) { 903 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 904 } else { 905 if (type == NORM_FROBENIUS) { 906 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 907 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 908 } 909 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 910 *nrm = PetscSqrtReal(*nrm); 911 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 912 } else if (type == NORM_1) { 913 PetscReal *tmp,*tmp2; 914 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 915 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 916 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 917 *nrm = 0.0; 918 v = mat->v; 919 for (j=0; j<mdn->A->cmap->n; j++) { 920 for (i=0; i<mdn->A->rmap->n; i++) { 921 tmp[j] += PetscAbsScalar(*v); v++; 922 } 923 } 924 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 925 for (j=0; j<A->cmap->N; j++) { 926 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 927 } 928 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 929 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 930 } else if (type == NORM_INFINITY) { /* max row norm */ 931 PetscReal ntemp; 932 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 933 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 934 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 935 } 936 PetscFunctionReturn(0); 937 } 938 939 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 940 { 941 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 942 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 943 Mat B; 944 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 945 PetscErrorCode ierr; 946 PetscInt j,i; 947 PetscScalar *v; 948 949 PetscFunctionBegin; 950 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 951 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 952 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 953 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 954 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 955 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 956 } else { 957 B = *matout; 958 } 959 960 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 961 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 962 for (i=0; i<m; i++) rwork[i] = rstart + i; 963 for (j=0; j<n; j++) { 964 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 965 v += m; 966 } 967 ierr = PetscFree(rwork);CHKERRQ(ierr); 968 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 971 *matout = B; 972 } else { 973 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 974 } 975 PetscFunctionReturn(0); 976 } 977 978 979 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 980 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 981 982 PetscErrorCode MatSetUp_MPIDense(Mat A) 983 { 984 PetscErrorCode ierr; 985 986 PetscFunctionBegin; 987 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 992 { 993 PetscErrorCode ierr; 994 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 995 996 PetscFunctionBegin; 997 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 998 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 999 PetscFunctionReturn(0); 1000 } 1001 1002 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1003 { 1004 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1005 PetscErrorCode ierr; 1006 1007 PetscFunctionBegin; 1008 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 PetscErrorCode MatRealPart_MPIDense(Mat A) 1013 { 1014 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1015 PetscErrorCode ierr; 1016 1017 PetscFunctionBegin; 1018 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1023 { 1024 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1025 PetscErrorCode ierr; 1026 1027 PetscFunctionBegin; 1028 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1033 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1034 { 1035 PetscErrorCode ierr; 1036 PetscInt i,n; 1037 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1038 PetscReal *work; 1039 1040 PetscFunctionBegin; 1041 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1042 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1043 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1044 if (type == NORM_2) { 1045 for (i=0; i<n; i++) work[i] *= work[i]; 1046 } 1047 if (type == NORM_INFINITY) { 1048 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1049 } else { 1050 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1051 } 1052 ierr = PetscFree(work);CHKERRQ(ierr); 1053 if (type == NORM_2) { 1054 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1055 } 1056 PetscFunctionReturn(0); 1057 } 1058 1059 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1060 { 1061 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1062 PetscErrorCode ierr; 1063 PetscScalar *a; 1064 PetscInt m,n,i; 1065 1066 PetscFunctionBegin; 1067 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1068 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1069 for (i=0; i<m*n; i++) { 1070 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1071 } 1072 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1077 1078 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1079 { 1080 PetscFunctionBegin; 1081 *missing = PETSC_FALSE; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /* -------------------------------------------------------------------*/ 1086 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1087 MatGetRow_MPIDense, 1088 MatRestoreRow_MPIDense, 1089 MatMult_MPIDense, 1090 /* 4*/ MatMultAdd_MPIDense, 1091 MatMultTranspose_MPIDense, 1092 MatMultTransposeAdd_MPIDense, 1093 0, 1094 0, 1095 0, 1096 /* 10*/ 0, 1097 0, 1098 0, 1099 0, 1100 MatTranspose_MPIDense, 1101 /* 15*/ MatGetInfo_MPIDense, 1102 MatEqual_MPIDense, 1103 MatGetDiagonal_MPIDense, 1104 MatDiagonalScale_MPIDense, 1105 MatNorm_MPIDense, 1106 /* 20*/ MatAssemblyBegin_MPIDense, 1107 MatAssemblyEnd_MPIDense, 1108 MatSetOption_MPIDense, 1109 MatZeroEntries_MPIDense, 1110 /* 24*/ MatZeroRows_MPIDense, 1111 0, 1112 0, 1113 0, 1114 0, 1115 /* 29*/ MatSetUp_MPIDense, 1116 0, 1117 0, 1118 MatGetDiagonalBlock_MPIDense, 1119 0, 1120 /* 34*/ MatDuplicate_MPIDense, 1121 0, 1122 0, 1123 0, 1124 0, 1125 /* 39*/ MatAXPY_MPIDense, 1126 MatCreateSubMatrices_MPIDense, 1127 0, 1128 MatGetValues_MPIDense, 1129 0, 1130 /* 44*/ 0, 1131 MatScale_MPIDense, 1132 MatShift_Basic, 1133 0, 1134 0, 1135 /* 49*/ MatSetRandom_MPIDense, 1136 0, 1137 0, 1138 0, 1139 0, 1140 /* 54*/ 0, 1141 0, 1142 0, 1143 0, 1144 0, 1145 /* 59*/ MatCreateSubMatrix_MPIDense, 1146 MatDestroy_MPIDense, 1147 MatView_MPIDense, 1148 0, 1149 0, 1150 /* 64*/ 0, 1151 0, 1152 0, 1153 0, 1154 0, 1155 /* 69*/ 0, 1156 0, 1157 0, 1158 0, 1159 0, 1160 /* 74*/ 0, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /* 79*/ 0, 1166 0, 1167 0, 1168 0, 1169 /* 83*/ MatLoad_MPIDense, 1170 0, 1171 0, 1172 0, 1173 0, 1174 0, 1175 #if defined(PETSC_HAVE_ELEMENTAL) 1176 /* 89*/ MatMatMult_MPIDense_MPIDense, 1177 MatMatMultSymbolic_MPIDense_MPIDense, 1178 #else 1179 /* 89*/ 0, 1180 0, 1181 #endif 1182 MatMatMultNumeric_MPIDense, 1183 0, 1184 0, 1185 /* 94*/ 0, 1186 0, 1187 0, 1188 0, 1189 0, 1190 /* 99*/ 0, 1191 0, 1192 0, 1193 MatConjugate_MPIDense, 1194 0, 1195 /*104*/ 0, 1196 MatRealPart_MPIDense, 1197 MatImaginaryPart_MPIDense, 1198 0, 1199 0, 1200 /*109*/ 0, 1201 0, 1202 0, 1203 0, 1204 MatMissingDiagonal_MPIDense, 1205 /*114*/ 0, 1206 0, 1207 0, 1208 0, 1209 0, 1210 /*119*/ 0, 1211 0, 1212 0, 1213 0, 1214 0, 1215 /*124*/ 0, 1216 MatGetColumnNorms_MPIDense, 1217 0, 1218 0, 1219 0, 1220 /*129*/ 0, 1221 MatTransposeMatMult_MPIDense_MPIDense, 1222 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1223 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1224 0, 1225 /*134*/ 0, 1226 0, 1227 0, 1228 0, 1229 0, 1230 /*139*/ 0, 1231 0, 1232 0 1233 }; 1234 1235 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1236 { 1237 Mat_MPIDense *a; 1238 PetscErrorCode ierr; 1239 1240 PetscFunctionBegin; 1241 mat->preallocated = PETSC_TRUE; 1242 /* Note: For now, when data is specified above, this assumes the user correctly 1243 allocates the local dense storage space. We should add error checking. */ 1244 1245 a = (Mat_MPIDense*)mat->data; 1246 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1247 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1248 a->nvec = mat->cmap->n; 1249 1250 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1251 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1252 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1253 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1254 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #if defined(PETSC_HAVE_ELEMENTAL) 1259 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1260 { 1261 Mat mat_elemental; 1262 PetscErrorCode ierr; 1263 PetscScalar *v; 1264 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1265 1266 PetscFunctionBegin; 1267 if (reuse == MAT_REUSE_MATRIX) { 1268 mat_elemental = *newmat; 1269 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1270 } else { 1271 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1272 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1273 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1274 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1275 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1276 } 1277 1278 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1279 for (i=0; i<N; i++) cols[i] = i; 1280 for (i=0; i<m; i++) rows[i] = rstart + i; 1281 1282 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1283 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1284 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1285 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1286 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1287 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1288 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1289 1290 if (reuse == MAT_INPLACE_MATRIX) { 1291 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1292 } else { 1293 *newmat = mat_elemental; 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 #endif 1298 1299 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1300 { 1301 Mat_MPIDense *a; 1302 PetscErrorCode ierr; 1303 1304 PetscFunctionBegin; 1305 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1306 mat->data = (void*)a; 1307 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1308 1309 mat->insertmode = NOT_SET_VALUES; 1310 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1311 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1312 1313 /* build cache for off array entries formed */ 1314 a->donotstash = PETSC_FALSE; 1315 1316 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1317 1318 /* stuff used for matrix vector multiply */ 1319 a->lvec = 0; 1320 a->Mvctx = 0; 1321 a->roworiented = PETSC_TRUE; 1322 1323 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1324 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1325 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1326 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1327 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1328 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1329 #if defined(PETSC_HAVE_ELEMENTAL) 1330 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1331 #endif 1332 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1333 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1334 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1335 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1336 1337 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1338 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1339 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1340 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 /*MC 1345 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1346 1347 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1348 and MATMPIDENSE otherwise. 1349 1350 Options Database Keys: 1351 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1352 1353 Level: beginner 1354 1355 1356 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1357 M*/ 1358 1359 /*@C 1360 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1361 1362 Not collective 1363 1364 Input Parameters: 1365 . B - the matrix 1366 - data - optional location of matrix data. Set data=NULL for PETSc 1367 to control all matrix memory allocation. 1368 1369 Notes: 1370 The dense format is fully compatible with standard Fortran 77 1371 storage by columns. 1372 1373 The data input variable is intended primarily for Fortran programmers 1374 who wish to allocate their own matrix memory space. Most users should 1375 set data=NULL. 1376 1377 Level: intermediate 1378 1379 .keywords: matrix,dense, parallel 1380 1381 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1382 @*/ 1383 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1384 { 1385 PetscErrorCode ierr; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 /*@ 1393 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1394 array provided by the user. This is useful to avoid copying an array 1395 into a matrix 1396 1397 Not Collective 1398 1399 Input Parameters: 1400 + mat - the matrix 1401 - array - the array in column major order 1402 1403 Notes: 1404 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1405 freed when the matrix is destroyed. 1406 1407 Level: developer 1408 1409 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1410 1411 @*/ 1412 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1413 { 1414 PetscErrorCode ierr; 1415 PetscFunctionBegin; 1416 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1417 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 /*@ 1422 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1423 1424 Not Collective 1425 1426 Input Parameters: 1427 . mat - the matrix 1428 1429 Notes: 1430 You can only call this after a call to MatDensePlaceArray() 1431 1432 Level: developer 1433 1434 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1435 1436 @*/ 1437 PetscErrorCode MatDenseResetArray(Mat mat) 1438 { 1439 PetscErrorCode ierr; 1440 PetscFunctionBegin; 1441 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1442 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 /*@C 1447 MatCreateDense - Creates a parallel matrix in dense format. 1448 1449 Collective on MPI_Comm 1450 1451 Input Parameters: 1452 + comm - MPI communicator 1453 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1454 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1455 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1456 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1457 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1458 to control all matrix memory allocation. 1459 1460 Output Parameter: 1461 . A - the matrix 1462 1463 Notes: 1464 The dense format is fully compatible with standard Fortran 77 1465 storage by columns. 1466 1467 The data input variable is intended primarily for Fortran programmers 1468 who wish to allocate their own matrix memory space. Most users should 1469 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1470 1471 The user MUST specify either the local or global matrix dimensions 1472 (possibly both). 1473 1474 Level: intermediate 1475 1476 .keywords: matrix,dense, parallel 1477 1478 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1479 @*/ 1480 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1481 { 1482 PetscErrorCode ierr; 1483 PetscMPIInt size; 1484 1485 PetscFunctionBegin; 1486 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1487 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1488 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1489 if (size > 1) { 1490 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1491 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1492 if (data) { /* user provided data array, so no need to assemble */ 1493 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1494 (*A)->assembled = PETSC_TRUE; 1495 } 1496 } else { 1497 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1498 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1504 { 1505 Mat mat; 1506 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 *newmat = 0; 1511 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1512 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1513 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1514 a = (Mat_MPIDense*)mat->data; 1515 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1516 1517 mat->factortype = A->factortype; 1518 mat->assembled = PETSC_TRUE; 1519 mat->preallocated = PETSC_TRUE; 1520 1521 a->size = oldmat->size; 1522 a->rank = oldmat->rank; 1523 mat->insertmode = NOT_SET_VALUES; 1524 a->nvec = oldmat->nvec; 1525 a->donotstash = oldmat->donotstash; 1526 1527 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1528 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1529 1530 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1531 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1532 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1533 1534 *newmat = mat; 1535 PetscFunctionReturn(0); 1536 } 1537 1538 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1539 { 1540 PetscErrorCode ierr; 1541 PetscMPIInt rank,size; 1542 const PetscInt *rowners; 1543 PetscInt i,m,n,nz,j,mMax; 1544 PetscScalar *array,*vals,*vals_ptr; 1545 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1546 1547 PetscFunctionBegin; 1548 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1549 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1550 1551 /* determine ownership of rows and columns */ 1552 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1553 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1554 1555 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1556 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1557 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1558 } 1559 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1560 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1561 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1562 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1563 if (!rank) { 1564 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1565 1566 /* read in my part of the matrix numerical values */ 1567 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1568 1569 /* insert into matrix-by row (this is why cannot directly read into array */ 1570 vals_ptr = vals; 1571 for (i=0; i<m; i++) { 1572 for (j=0; j<N; j++) { 1573 array[i + j*m] = *vals_ptr++; 1574 } 1575 } 1576 1577 /* read in other processors and ship out */ 1578 for (i=1; i<size; i++) { 1579 nz = (rowners[i+1] - rowners[i])*N; 1580 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1581 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1582 } 1583 } else { 1584 /* receive numeric values */ 1585 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1586 1587 /* receive message of values*/ 1588 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1589 1590 /* insert into matrix-by row (this is why cannot directly read into array */ 1591 vals_ptr = vals; 1592 for (i=0; i<m; i++) { 1593 for (j=0; j<N; j++) { 1594 array[i + j*m] = *vals_ptr++; 1595 } 1596 } 1597 } 1598 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1599 ierr = PetscFree(vals);CHKERRQ(ierr); 1600 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1601 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1606 { 1607 Mat_MPIDense *a; 1608 PetscScalar *vals,*svals; 1609 MPI_Comm comm; 1610 MPI_Status status; 1611 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1612 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1613 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1614 PetscInt i,nz,j,rstart,rend; 1615 int fd; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 /* force binary viewer to load .info file if it has not yet done so */ 1620 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1621 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1622 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1623 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1624 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1625 if (!rank) { 1626 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1627 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1628 } 1629 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1630 M = header[1]; N = header[2]; nz = header[3]; 1631 1632 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1633 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1634 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1635 1636 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1637 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1638 1639 /* 1640 Handle case where matrix is stored on disk as a dense matrix 1641 */ 1642 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1643 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 /* determine ownership of all rows */ 1648 if (newmat->rmap->n < 0) { 1649 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1650 } else { 1651 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1652 } 1653 if (newmat->cmap->n < 0) { 1654 n = PETSC_DECIDE; 1655 } else { 1656 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1657 } 1658 1659 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1660 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1661 rowners[0] = 0; 1662 for (i=2; i<=size; i++) { 1663 rowners[i] += rowners[i-1]; 1664 } 1665 rstart = rowners[rank]; 1666 rend = rowners[rank+1]; 1667 1668 /* distribute row lengths to all processors */ 1669 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1670 if (!rank) { 1671 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1672 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1673 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1674 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1675 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1676 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1677 } else { 1678 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1679 } 1680 1681 if (!rank) { 1682 /* calculate the number of nonzeros on each processor */ 1683 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1684 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1685 for (i=0; i<size; i++) { 1686 for (j=rowners[i]; j< rowners[i+1]; j++) { 1687 procsnz[i] += rowlengths[j]; 1688 } 1689 } 1690 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1691 1692 /* determine max buffer needed and allocate it */ 1693 maxnz = 0; 1694 for (i=0; i<size; i++) { 1695 maxnz = PetscMax(maxnz,procsnz[i]); 1696 } 1697 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1698 1699 /* read in my part of the matrix column indices */ 1700 nz = procsnz[0]; 1701 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1702 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1703 1704 /* read in every one elses and ship off */ 1705 for (i=1; i<size; i++) { 1706 nz = procsnz[i]; 1707 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1708 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1709 } 1710 ierr = PetscFree(cols);CHKERRQ(ierr); 1711 } else { 1712 /* determine buffer space needed for message */ 1713 nz = 0; 1714 for (i=0; i<m; i++) { 1715 nz += ourlens[i]; 1716 } 1717 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1718 1719 /* receive message of column indices*/ 1720 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1721 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1722 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1723 } 1724 1725 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1726 a = (Mat_MPIDense*)newmat->data; 1727 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1728 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1729 } 1730 1731 if (!rank) { 1732 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1733 1734 /* read in my part of the matrix numerical values */ 1735 nz = procsnz[0]; 1736 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1737 1738 /* insert into matrix */ 1739 jj = rstart; 1740 smycols = mycols; 1741 svals = vals; 1742 for (i=0; i<m; i++) { 1743 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1744 smycols += ourlens[i]; 1745 svals += ourlens[i]; 1746 jj++; 1747 } 1748 1749 /* read in other processors and ship out */ 1750 for (i=1; i<size; i++) { 1751 nz = procsnz[i]; 1752 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1753 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1754 } 1755 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1756 } else { 1757 /* receive numeric values */ 1758 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1759 1760 /* receive message of values*/ 1761 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1762 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1763 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1764 1765 /* insert into matrix */ 1766 jj = rstart; 1767 smycols = mycols; 1768 svals = vals; 1769 for (i=0; i<m; i++) { 1770 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1771 smycols += ourlens[i]; 1772 svals += ourlens[i]; 1773 jj++; 1774 } 1775 } 1776 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1777 ierr = PetscFree(vals);CHKERRQ(ierr); 1778 ierr = PetscFree(mycols);CHKERRQ(ierr); 1779 ierr = PetscFree(rowners);CHKERRQ(ierr); 1780 1781 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1782 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 1786 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1787 { 1788 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1789 Mat a,b; 1790 PetscBool flg; 1791 PetscErrorCode ierr; 1792 1793 PetscFunctionBegin; 1794 a = matA->A; 1795 b = matB->A; 1796 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1797 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1798 PetscFunctionReturn(0); 1799 } 1800 1801 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1802 { 1803 PetscErrorCode ierr; 1804 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1805 Mat_TransMatMultDense *atb = a->atbdense; 1806 1807 PetscFunctionBegin; 1808 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1809 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1810 ierr = PetscFree(atb);CHKERRQ(ierr); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1815 { 1816 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1817 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1818 Mat_TransMatMultDense *atb = c->atbdense; 1819 PetscErrorCode ierr; 1820 MPI_Comm comm; 1821 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1822 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1823 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1824 PetscScalar _DOne=1.0,_DZero=0.0; 1825 PetscBLASInt am,an,bn,aN; 1826 const PetscInt *ranges; 1827 1828 PetscFunctionBegin; 1829 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1830 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1831 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1832 1833 /* compute atbarray = aseq^T * bseq */ 1834 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1835 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1836 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1837 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1838 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1839 1840 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1841 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1842 1843 /* arrange atbarray into sendbuf */ 1844 k = 0; 1845 for (proc=0; proc<size; proc++) { 1846 for (j=0; j<cN; j++) { 1847 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1848 } 1849 } 1850 /* sum all atbarray to local values of C */ 1851 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1852 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1853 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1858 { 1859 PetscErrorCode ierr; 1860 Mat Cdense; 1861 MPI_Comm comm; 1862 PetscMPIInt size; 1863 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1864 Mat_MPIDense *c; 1865 Mat_TransMatMultDense *atb; 1866 1867 PetscFunctionBegin; 1868 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1869 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1870 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1871 } 1872 1873 /* create matrix product Cdense */ 1874 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1875 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1876 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1877 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1878 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 *C = Cdense; 1881 1882 /* create data structure for reuse Cdense */ 1883 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1884 ierr = PetscNew(&atb);CHKERRQ(ierr); 1885 cM = Cdense->rmap->N; 1886 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1887 1888 c = (Mat_MPIDense*)Cdense->data; 1889 c->atbdense = atb; 1890 atb->destroy = Cdense->ops->destroy; 1891 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1896 { 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 if (scall == MAT_INITIAL_MATRIX) { 1901 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1902 } 1903 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1904 PetscFunctionReturn(0); 1905 } 1906 1907 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1908 { 1909 PetscErrorCode ierr; 1910 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1911 Mat_MatMultDense *ab = a->abdense; 1912 1913 PetscFunctionBegin; 1914 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1915 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1916 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1917 1918 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1919 ierr = PetscFree(ab);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 #if defined(PETSC_HAVE_ELEMENTAL) 1924 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1925 { 1926 PetscErrorCode ierr; 1927 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1928 Mat_MatMultDense *ab=c->abdense; 1929 1930 PetscFunctionBegin; 1931 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1932 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1933 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1934 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1939 { 1940 PetscErrorCode ierr; 1941 Mat Ae,Be,Ce; 1942 Mat_MPIDense *c; 1943 Mat_MatMultDense *ab; 1944 1945 PetscFunctionBegin; 1946 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1947 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1948 } 1949 1950 /* convert A and B to Elemental matrices Ae and Be */ 1951 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1952 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1953 1954 /* Ce = Ae*Be */ 1955 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1956 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1957 1958 /* convert Ce to C */ 1959 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1960 1961 /* create data structure for reuse Cdense */ 1962 ierr = PetscNew(&ab);CHKERRQ(ierr); 1963 c = (Mat_MPIDense*)(*C)->data; 1964 c->abdense = ab; 1965 1966 ab->Ae = Ae; 1967 ab->Be = Be; 1968 ab->Ce = Ce; 1969 ab->destroy = (*C)->ops->destroy; 1970 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1971 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1976 { 1977 PetscErrorCode ierr; 1978 1979 PetscFunctionBegin; 1980 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1981 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1982 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1983 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1984 } else { 1985 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1986 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1987 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 #endif 1992