1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 178 { 179 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 180 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 181 PetscErrorCode ierr; 182 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 183 const PetscInt *irow,*icol; 184 PetscScalar *av,*bv,*v = lmat->v; 185 Mat newmat; 186 IS iscol_local; 187 188 PetscFunctionBegin; 189 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 190 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 191 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 193 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 194 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 195 196 /* No parallel redistribution currently supported! Should really check each index set 197 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 198 original matrix! */ 199 200 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 201 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 202 203 /* Check submatrix call */ 204 if (scall == MAT_REUSE_MATRIX) { 205 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 206 /* Really need to test rows and column sizes! */ 207 newmat = *B; 208 } else { 209 /* Create and fill new matrix */ 210 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 211 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 212 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 213 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 214 } 215 216 /* Now extract the data pointers and do the copy, column at a time */ 217 newmatd = (Mat_MPIDense*)newmat->data; 218 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 219 220 for (i=0; i<Ncols; i++) { 221 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 222 for (j=0; j<nrows; j++) { 223 *bv++ = av[irow[j] - rstart]; 224 } 225 } 226 227 /* Assemble the matrices so that the correct flags are set */ 228 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 229 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 230 231 /* Free work space */ 232 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 233 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 234 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 235 *B = newmat; 236 PetscFunctionReturn(0); 237 } 238 239 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 240 { 241 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 242 PetscErrorCode ierr; 243 244 PetscFunctionBegin; 245 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt i,*row,*col,flg,j,rstart,ncols; 275 PetscMPIInt n; 276 PetscScalar *val; 277 278 PetscFunctionBegin; 279 /* wait on receives */ 280 while (1) { 281 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 282 if (!flg) break; 283 284 for (i=0; i<n;) { 285 /* Now identify the consecutive vals belonging to the same row */ 286 for (j=i,rstart=row[j]; j<n; j++) { 287 if (row[j] != rstart) break; 288 } 289 if (j < n) ncols = j-i; 290 else ncols = n-i; 291 /* Now assemble all these values with a single function call */ 292 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 293 i = j; 294 } 295 } 296 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 297 298 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 299 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 300 301 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 302 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 303 } 304 PetscFunctionReturn(0); 305 } 306 307 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 308 { 309 PetscErrorCode ierr; 310 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 311 312 PetscFunctionBegin; 313 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 /* the code does not do the diagonal entries correctly unless the 318 matrix is square and the column and row owerships are identical. 319 This is a BUG. The only way to fix it seems to be to access 320 mdn->A and mdn->B directly and not through the MatZeroRows() 321 routine. 322 */ 323 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 324 { 325 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 326 PetscErrorCode ierr; 327 PetscInt i,*owners = A->rmap->range; 328 PetscInt *sizes,j,idx,nsends; 329 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 330 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 331 PetscInt *lens,*lrows,*values; 332 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 333 MPI_Comm comm; 334 MPI_Request *send_waits,*recv_waits; 335 MPI_Status recv_status,*send_status; 336 PetscBool found; 337 const PetscScalar *xx; 338 PetscScalar *bb; 339 340 PetscFunctionBegin; 341 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 342 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 343 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 344 /* first count number of contributors to each processor */ 345 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 346 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 347 for (i=0; i<N; i++) { 348 idx = rows[i]; 349 found = PETSC_FALSE; 350 for (j=0; j<size; j++) { 351 if (idx >= owners[j] && idx < owners[j+1]) { 352 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 353 } 354 } 355 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 356 } 357 nsends = 0; 358 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 359 360 /* inform other processors of number of messages and max length*/ 361 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 362 363 /* post receives: */ 364 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 365 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 366 for (i=0; i<nrecvs; i++) { 367 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 368 } 369 370 /* do sends: 371 1) starts[i] gives the starting index in svalues for stuff going to 372 the ith processor 373 */ 374 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 375 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 376 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 377 378 starts[0] = 0; 379 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 380 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 381 382 starts[0] = 0; 383 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 384 count = 0; 385 for (i=0; i<size; i++) { 386 if (sizes[2*i+1]) { 387 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 388 } 389 } 390 ierr = PetscFree(starts);CHKERRQ(ierr); 391 392 base = owners[rank]; 393 394 /* wait on receives */ 395 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 396 count = nrecvs; 397 slen = 0; 398 while (count) { 399 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 400 /* unpack receives into our local space */ 401 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 402 403 source[imdex] = recv_status.MPI_SOURCE; 404 lens[imdex] = n; 405 slen += n; 406 count--; 407 } 408 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 409 410 /* move the data into the send scatter */ 411 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 412 count = 0; 413 for (i=0; i<nrecvs; i++) { 414 values = rvalues + i*nmax; 415 for (j=0; j<lens[i]; j++) { 416 lrows[count++] = values[j] - base; 417 } 418 } 419 ierr = PetscFree(rvalues);CHKERRQ(ierr); 420 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 421 ierr = PetscFree(owner);CHKERRQ(ierr); 422 ierr = PetscFree(sizes);CHKERRQ(ierr); 423 424 /* fix right hand side if needed */ 425 if (x && b) { 426 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 427 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 428 for (i=0; i<slen; i++) { 429 bb[lrows[i]] = diag*xx[lrows[i]]; 430 } 431 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 432 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 433 } 434 435 /* actually zap the local rows */ 436 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 437 if (diag != 0.0) { 438 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 439 PetscInt m = ll->lda, i; 440 441 for (i=0; i<slen; i++) { 442 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 443 } 444 } 445 ierr = PetscFree(lrows);CHKERRQ(ierr); 446 447 /* wait on sends */ 448 if (nsends) { 449 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 450 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 451 ierr = PetscFree(send_status);CHKERRQ(ierr); 452 } 453 ierr = PetscFree(send_waits);CHKERRQ(ierr); 454 ierr = PetscFree(svalues);CHKERRQ(ierr); 455 PetscFunctionReturn(0); 456 } 457 458 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 460 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 461 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 462 463 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 464 { 465 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 466 PetscErrorCode ierr; 467 468 PetscFunctionBegin; 469 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 476 { 477 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 478 PetscErrorCode ierr; 479 480 PetscFunctionBegin; 481 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 482 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 483 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486 487 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 488 { 489 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 490 PetscErrorCode ierr; 491 PetscScalar zero = 0.0; 492 493 PetscFunctionBegin; 494 ierr = VecSet(yy,zero);CHKERRQ(ierr); 495 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 496 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 502 { 503 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 508 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 509 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 PetscFunctionReturn(0); 512 } 513 514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 515 { 516 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 517 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 518 PetscErrorCode ierr; 519 PetscInt len,i,n,m = A->rmap->n,radd; 520 PetscScalar *x,zero = 0.0; 521 522 PetscFunctionBegin; 523 ierr = VecSet(v,zero);CHKERRQ(ierr); 524 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 525 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 526 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 527 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 528 radd = A->rmap->rstart*m; 529 for (i=0; i<len; i++) { 530 x[i] = aloc->v[radd + i*m + i]; 531 } 532 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatDestroy_MPIDense(Mat mat) 537 { 538 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 #if defined(PETSC_USE_LOG) 543 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 544 #endif 545 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 546 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 547 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 548 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 549 550 ierr = PetscFree(mat->data);CHKERRQ(ierr); 551 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 552 553 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 554 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 556 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 558 #if defined(PETSC_HAVE_ELEMENTAL) 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 560 #endif 561 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 562 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 570 PetscFunctionReturn(0); 571 } 572 573 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 574 { 575 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 576 PetscErrorCode ierr; 577 PetscViewerFormat format; 578 int fd; 579 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 580 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 581 PetscScalar *work,*v,*vv; 582 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 583 584 PetscFunctionBegin; 585 if (mdn->size == 1) { 586 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 587 } else { 588 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 589 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 590 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 591 592 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 593 if (format == PETSC_VIEWER_NATIVE) { 594 595 if (!rank) { 596 /* store the matrix as a dense matrix */ 597 header[0] = MAT_FILE_CLASSID; 598 header[1] = mat->rmap->N; 599 header[2] = N; 600 header[3] = MATRIX_BINARY_FORMAT_DENSE; 601 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 602 603 /* get largest work array needed for transposing array */ 604 mmax = mat->rmap->n; 605 for (i=1; i<size; i++) { 606 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 607 } 608 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 609 610 /* write out local array, by rows */ 611 m = mat->rmap->n; 612 v = a->v; 613 for (j=0; j<N; j++) { 614 for (i=0; i<m; i++) { 615 work[j + i*N] = *v++; 616 } 617 } 618 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 619 /* get largest work array to receive messages from other processes, excludes process zero */ 620 mmax = 0; 621 for (i=1; i<size; i++) { 622 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 623 } 624 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 625 for (k = 1; k < size; k++) { 626 v = vv; 627 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 628 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 629 630 for (j = 0; j < N; j++) { 631 for (i = 0; i < m; i++) { 632 work[j + i*N] = *v++; 633 } 634 } 635 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 636 } 637 ierr = PetscFree(work);CHKERRQ(ierr); 638 ierr = PetscFree(vv);CHKERRQ(ierr); 639 } else { 640 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 641 } 642 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 643 } 644 PetscFunctionReturn(0); 645 } 646 647 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 648 #include <petscdraw.h> 649 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 650 { 651 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 652 PetscErrorCode ierr; 653 PetscMPIInt rank = mdn->rank; 654 PetscViewerType vtype; 655 PetscBool iascii,isdraw; 656 PetscViewer sviewer; 657 PetscViewerFormat format; 658 659 PetscFunctionBegin; 660 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 661 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 662 if (iascii) { 663 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 664 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 665 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 666 MatInfo info; 667 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 668 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 669 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 670 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 671 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 672 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } else if (format == PETSC_VIEWER_ASCII_INFO) { 675 PetscFunctionReturn(0); 676 } 677 } else if (isdraw) { 678 PetscDraw draw; 679 PetscBool isnull; 680 681 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 682 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 683 if (isnull) PetscFunctionReturn(0); 684 } 685 686 { 687 /* assemble the entire matrix onto first processor. */ 688 Mat A; 689 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 690 PetscInt *cols; 691 PetscScalar *vals; 692 693 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 694 if (!rank) { 695 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 696 } else { 697 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 698 } 699 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 700 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 701 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 702 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 703 704 /* Copy the matrix ... This isn't the most efficient means, 705 but it's quick for now */ 706 A->insertmode = INSERT_VALUES; 707 708 row = mat->rmap->rstart; 709 m = mdn->A->rmap->n; 710 for (i=0; i<m; i++) { 711 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 712 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 713 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 714 row++; 715 } 716 717 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 718 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 719 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 720 if (!rank) { 721 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 722 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 723 } 724 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 725 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 726 ierr = MatDestroy(&A);CHKERRQ(ierr); 727 } 728 PetscFunctionReturn(0); 729 } 730 731 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 732 { 733 PetscErrorCode ierr; 734 PetscBool iascii,isbinary,isdraw,issocket; 735 736 PetscFunctionBegin; 737 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 739 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 740 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 741 742 if (iascii || issocket || isdraw) { 743 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 744 } else if (isbinary) { 745 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 746 } 747 PetscFunctionReturn(0); 748 } 749 750 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 751 { 752 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 753 Mat mdn = mat->A; 754 PetscErrorCode ierr; 755 PetscReal isend[5],irecv[5]; 756 757 PetscFunctionBegin; 758 info->block_size = 1.0; 759 760 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 761 762 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 763 isend[3] = info->memory; isend[4] = info->mallocs; 764 if (flag == MAT_LOCAL) { 765 info->nz_used = isend[0]; 766 info->nz_allocated = isend[1]; 767 info->nz_unneeded = isend[2]; 768 info->memory = isend[3]; 769 info->mallocs = isend[4]; 770 } else if (flag == MAT_GLOBAL_MAX) { 771 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 772 773 info->nz_used = irecv[0]; 774 info->nz_allocated = irecv[1]; 775 info->nz_unneeded = irecv[2]; 776 info->memory = irecv[3]; 777 info->mallocs = irecv[4]; 778 } else if (flag == MAT_GLOBAL_SUM) { 779 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 780 781 info->nz_used = irecv[0]; 782 info->nz_allocated = irecv[1]; 783 info->nz_unneeded = irecv[2]; 784 info->memory = irecv[3]; 785 info->mallocs = irecv[4]; 786 } 787 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 788 info->fill_ratio_needed = 0; 789 info->factor_mallocs = 0; 790 PetscFunctionReturn(0); 791 } 792 793 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 794 { 795 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 796 PetscErrorCode ierr; 797 798 PetscFunctionBegin; 799 switch (op) { 800 case MAT_NEW_NONZERO_LOCATIONS: 801 case MAT_NEW_NONZERO_LOCATION_ERR: 802 case MAT_NEW_NONZERO_ALLOCATION_ERR: 803 MatCheckPreallocated(A,1); 804 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 805 break; 806 case MAT_ROW_ORIENTED: 807 MatCheckPreallocated(A,1); 808 a->roworiented = flg; 809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 810 break; 811 case MAT_NEW_DIAGONALS: 812 case MAT_KEEP_NONZERO_PATTERN: 813 case MAT_USE_HASH_TABLE: 814 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 815 break; 816 case MAT_IGNORE_OFF_PROC_ENTRIES: 817 a->donotstash = flg; 818 break; 819 case MAT_SYMMETRIC: 820 case MAT_STRUCTURALLY_SYMMETRIC: 821 case MAT_HERMITIAN: 822 case MAT_SYMMETRY_ETERNAL: 823 case MAT_IGNORE_LOWER_TRIANGULAR: 824 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 825 break; 826 default: 827 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 828 } 829 PetscFunctionReturn(0); 830 } 831 832 833 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 834 { 835 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 836 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 837 const PetscScalar *l,*r; 838 PetscScalar x,*v; 839 PetscErrorCode ierr; 840 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 841 842 PetscFunctionBegin; 843 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 844 if (ll) { 845 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 846 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 847 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 848 for (i=0; i<m; i++) { 849 x = l[i]; 850 v = mat->v + i; 851 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 852 } 853 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 854 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 855 } 856 if (rr) { 857 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 858 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 859 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 861 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 862 for (i=0; i<n; i++) { 863 x = r[i]; 864 v = mat->v + i*m; 865 for (j=0; j<m; j++) (*v++) *= x; 866 } 867 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 868 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 869 } 870 PetscFunctionReturn(0); 871 } 872 873 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 874 { 875 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 876 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 877 PetscErrorCode ierr; 878 PetscInt i,j; 879 PetscReal sum = 0.0; 880 PetscScalar *v = mat->v; 881 882 PetscFunctionBegin; 883 if (mdn->size == 1) { 884 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 885 } else { 886 if (type == NORM_FROBENIUS) { 887 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 889 } 890 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 891 *nrm = PetscSqrtReal(*nrm); 892 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 893 } else if (type == NORM_1) { 894 PetscReal *tmp,*tmp2; 895 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 896 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 897 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 898 *nrm = 0.0; 899 v = mat->v; 900 for (j=0; j<mdn->A->cmap->n; j++) { 901 for (i=0; i<mdn->A->rmap->n; i++) { 902 tmp[j] += PetscAbsScalar(*v); v++; 903 } 904 } 905 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 906 for (j=0; j<A->cmap->N; j++) { 907 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 908 } 909 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 910 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 911 } else if (type == NORM_INFINITY) { /* max row norm */ 912 PetscReal ntemp; 913 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 914 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 915 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 916 } 917 PetscFunctionReturn(0); 918 } 919 920 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 921 { 922 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 923 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 924 Mat B; 925 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 926 PetscErrorCode ierr; 927 PetscInt j,i; 928 PetscScalar *v; 929 930 PetscFunctionBegin; 931 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 932 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 933 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 934 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 935 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 936 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 937 } else { 938 B = *matout; 939 } 940 941 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 942 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 943 for (i=0; i<m; i++) rwork[i] = rstart + i; 944 for (j=0; j<n; j++) { 945 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 946 v += m; 947 } 948 ierr = PetscFree(rwork);CHKERRQ(ierr); 949 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 950 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 951 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 952 *matout = B; 953 } else { 954 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 955 } 956 PetscFunctionReturn(0); 957 } 958 959 960 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 961 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 962 963 PetscErrorCode MatSetUp_MPIDense(Mat A) 964 { 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 973 { 974 PetscErrorCode ierr; 975 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 976 977 PetscFunctionBegin; 978 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 979 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatConjugate_MPIDense(Mat mat) 984 { 985 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatConjugate(a->A);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PetscErrorCode MatRealPart_MPIDense(Mat A) 994 { 995 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1004 { 1005 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1014 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1015 { 1016 PetscErrorCode ierr; 1017 PetscInt i,n; 1018 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1019 PetscReal *work; 1020 1021 PetscFunctionBegin; 1022 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1023 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1024 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1025 if (type == NORM_2) { 1026 for (i=0; i<n; i++) work[i] *= work[i]; 1027 } 1028 if (type == NORM_INFINITY) { 1029 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1030 } else { 1031 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(work);CHKERRQ(ierr); 1034 if (type == NORM_2) { 1035 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1036 } 1037 PetscFunctionReturn(0); 1038 } 1039 1040 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1041 { 1042 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1043 PetscErrorCode ierr; 1044 PetscScalar *a; 1045 PetscInt m,n,i; 1046 1047 PetscFunctionBegin; 1048 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1049 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1050 for (i=0; i<m*n; i++) { 1051 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1052 } 1053 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1058 1059 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1060 { 1061 PetscFunctionBegin; 1062 *missing = PETSC_FALSE; 1063 PetscFunctionReturn(0); 1064 } 1065 1066 /* -------------------------------------------------------------------*/ 1067 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1068 MatGetRow_MPIDense, 1069 MatRestoreRow_MPIDense, 1070 MatMult_MPIDense, 1071 /* 4*/ MatMultAdd_MPIDense, 1072 MatMultTranspose_MPIDense, 1073 MatMultTransposeAdd_MPIDense, 1074 0, 1075 0, 1076 0, 1077 /* 10*/ 0, 1078 0, 1079 0, 1080 0, 1081 MatTranspose_MPIDense, 1082 /* 15*/ MatGetInfo_MPIDense, 1083 MatEqual_MPIDense, 1084 MatGetDiagonal_MPIDense, 1085 MatDiagonalScale_MPIDense, 1086 MatNorm_MPIDense, 1087 /* 20*/ MatAssemblyBegin_MPIDense, 1088 MatAssemblyEnd_MPIDense, 1089 MatSetOption_MPIDense, 1090 MatZeroEntries_MPIDense, 1091 /* 24*/ MatZeroRows_MPIDense, 1092 0, 1093 0, 1094 0, 1095 0, 1096 /* 29*/ MatSetUp_MPIDense, 1097 0, 1098 0, 1099 MatGetDiagonalBlock_MPIDense, 1100 0, 1101 /* 34*/ MatDuplicate_MPIDense, 1102 0, 1103 0, 1104 0, 1105 0, 1106 /* 39*/ MatAXPY_MPIDense, 1107 MatCreateSubMatrices_MPIDense, 1108 0, 1109 MatGetValues_MPIDense, 1110 0, 1111 /* 44*/ 0, 1112 MatScale_MPIDense, 1113 MatShift_Basic, 1114 0, 1115 0, 1116 /* 49*/ MatSetRandom_MPIDense, 1117 0, 1118 0, 1119 0, 1120 0, 1121 /* 54*/ 0, 1122 0, 1123 0, 1124 0, 1125 0, 1126 /* 59*/ MatCreateSubMatrix_MPIDense, 1127 MatDestroy_MPIDense, 1128 MatView_MPIDense, 1129 0, 1130 0, 1131 /* 64*/ 0, 1132 0, 1133 0, 1134 0, 1135 0, 1136 /* 69*/ 0, 1137 0, 1138 0, 1139 0, 1140 0, 1141 /* 74*/ 0, 1142 0, 1143 0, 1144 0, 1145 0, 1146 /* 79*/ 0, 1147 0, 1148 0, 1149 0, 1150 /* 83*/ MatLoad_MPIDense, 1151 0, 1152 0, 1153 0, 1154 0, 1155 0, 1156 #if defined(PETSC_HAVE_ELEMENTAL) 1157 /* 89*/ MatMatMult_MPIDense_MPIDense, 1158 MatMatMultSymbolic_MPIDense_MPIDense, 1159 #else 1160 /* 89*/ 0, 1161 0, 1162 #endif 1163 MatMatMultNumeric_MPIDense, 1164 0, 1165 0, 1166 /* 94*/ 0, 1167 0, 1168 0, 1169 0, 1170 0, 1171 /* 99*/ 0, 1172 0, 1173 0, 1174 MatConjugate_MPIDense, 1175 0, 1176 /*104*/ 0, 1177 MatRealPart_MPIDense, 1178 MatImaginaryPart_MPIDense, 1179 0, 1180 0, 1181 /*109*/ 0, 1182 0, 1183 0, 1184 0, 1185 MatMissingDiagonal_MPIDense, 1186 /*114*/ 0, 1187 0, 1188 0, 1189 0, 1190 0, 1191 /*119*/ 0, 1192 0, 1193 0, 1194 0, 1195 0, 1196 /*124*/ 0, 1197 MatGetColumnNorms_MPIDense, 1198 0, 1199 0, 1200 0, 1201 /*129*/ 0, 1202 MatTransposeMatMult_MPIDense_MPIDense, 1203 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1204 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1205 0, 1206 /*134*/ 0, 1207 0, 1208 0, 1209 0, 1210 0, 1211 /*139*/ 0, 1212 0, 1213 0 1214 }; 1215 1216 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1217 { 1218 Mat_MPIDense *a; 1219 PetscErrorCode ierr; 1220 1221 PetscFunctionBegin; 1222 mat->preallocated = PETSC_TRUE; 1223 /* Note: For now, when data is specified above, this assumes the user correctly 1224 allocates the local dense storage space. We should add error checking. */ 1225 1226 a = (Mat_MPIDense*)mat->data; 1227 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1228 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1229 a->nvec = mat->cmap->n; 1230 1231 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1232 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1233 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1234 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1235 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #if defined(PETSC_HAVE_ELEMENTAL) 1240 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1241 { 1242 Mat mat_elemental; 1243 PetscErrorCode ierr; 1244 PetscScalar *v; 1245 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1246 1247 PetscFunctionBegin; 1248 if (reuse == MAT_REUSE_MATRIX) { 1249 mat_elemental = *newmat; 1250 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1251 } else { 1252 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1253 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1254 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1255 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1256 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1257 } 1258 1259 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1260 for (i=0; i<N; i++) cols[i] = i; 1261 for (i=0; i<m; i++) rows[i] = rstart + i; 1262 1263 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1264 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1265 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1266 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1267 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1268 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1269 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1270 1271 if (reuse == MAT_INPLACE_MATRIX) { 1272 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1273 } else { 1274 *newmat = mat_elemental; 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 #endif 1279 1280 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1281 { 1282 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1283 PetscErrorCode ierr; 1284 1285 PetscFunctionBegin; 1286 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1291 { 1292 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1293 PetscErrorCode ierr; 1294 1295 PetscFunctionBegin; 1296 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1301 { 1302 Mat_MPIDense *a; 1303 PetscErrorCode ierr; 1304 1305 PetscFunctionBegin; 1306 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1307 mat->data = (void*)a; 1308 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1309 1310 mat->insertmode = NOT_SET_VALUES; 1311 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1312 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1313 1314 /* build cache for off array entries formed */ 1315 a->donotstash = PETSC_FALSE; 1316 1317 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1318 1319 /* stuff used for matrix vector multiply */ 1320 a->lvec = 0; 1321 a->Mvctx = 0; 1322 a->roworiented = PETSC_TRUE; 1323 1324 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1325 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1326 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1327 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1328 #if defined(PETSC_HAVE_ELEMENTAL) 1329 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1330 #endif 1331 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1332 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1333 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1334 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1335 1336 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1337 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1338 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1339 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1340 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1341 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 /*MC 1346 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1347 1348 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1349 and MATMPIDENSE otherwise. 1350 1351 Options Database Keys: 1352 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1353 1354 Level: beginner 1355 1356 1357 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1358 M*/ 1359 1360 /*@C 1361 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1362 1363 Not collective 1364 1365 Input Parameters: 1366 . B - the matrix 1367 - data - optional location of matrix data. Set data=NULL for PETSc 1368 to control all matrix memory allocation. 1369 1370 Notes: 1371 The dense format is fully compatible with standard Fortran 77 1372 storage by columns. 1373 1374 The data input variable is intended primarily for Fortran programmers 1375 who wish to allocate their own matrix memory space. Most users should 1376 set data=NULL. 1377 1378 Level: intermediate 1379 1380 .keywords: matrix,dense, parallel 1381 1382 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1383 @*/ 1384 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1385 { 1386 PetscErrorCode ierr; 1387 1388 PetscFunctionBegin; 1389 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /*@ 1394 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1395 array provided by the user. This is useful to avoid copying an array 1396 into a matrix 1397 1398 Not Collective 1399 1400 Input Parameters: 1401 + mat - the matrix 1402 - array - the array in column major order 1403 1404 Notes: 1405 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1406 freed when the matrix is destroyed. 1407 1408 Level: developer 1409 1410 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1411 1412 @*/ 1413 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1414 { 1415 PetscErrorCode ierr; 1416 PetscFunctionBegin; 1417 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1418 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 /*@ 1423 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1424 1425 Not Collective 1426 1427 Input Parameters: 1428 . mat - the matrix 1429 1430 Notes: 1431 You can only call this after a call to MatDensePlaceArray() 1432 1433 Level: developer 1434 1435 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1436 1437 @*/ 1438 PetscErrorCode MatDenseResetArray(Mat mat) 1439 { 1440 PetscErrorCode ierr; 1441 PetscFunctionBegin; 1442 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1443 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /*@C 1448 MatCreateDense - Creates a parallel matrix in dense format. 1449 1450 Collective on MPI_Comm 1451 1452 Input Parameters: 1453 + comm - MPI communicator 1454 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1455 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1456 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1457 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1458 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1459 to control all matrix memory allocation. 1460 1461 Output Parameter: 1462 . A - the matrix 1463 1464 Notes: 1465 The dense format is fully compatible with standard Fortran 77 1466 storage by columns. 1467 1468 The data input variable is intended primarily for Fortran programmers 1469 who wish to allocate their own matrix memory space. Most users should 1470 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1471 1472 The user MUST specify either the local or global matrix dimensions 1473 (possibly both). 1474 1475 Level: intermediate 1476 1477 .keywords: matrix,dense, parallel 1478 1479 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1480 @*/ 1481 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1482 { 1483 PetscErrorCode ierr; 1484 PetscMPIInt size; 1485 1486 PetscFunctionBegin; 1487 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1488 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1489 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1490 if (size > 1) { 1491 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1492 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1493 if (data) { /* user provided data array, so no need to assemble */ 1494 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1495 (*A)->assembled = PETSC_TRUE; 1496 } 1497 } else { 1498 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1499 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1505 { 1506 Mat mat; 1507 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1508 PetscErrorCode ierr; 1509 1510 PetscFunctionBegin; 1511 *newmat = 0; 1512 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1513 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1514 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1515 a = (Mat_MPIDense*)mat->data; 1516 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1517 1518 mat->factortype = A->factortype; 1519 mat->assembled = PETSC_TRUE; 1520 mat->preallocated = PETSC_TRUE; 1521 1522 a->size = oldmat->size; 1523 a->rank = oldmat->rank; 1524 mat->insertmode = NOT_SET_VALUES; 1525 a->nvec = oldmat->nvec; 1526 a->donotstash = oldmat->donotstash; 1527 1528 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1529 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1530 1531 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1532 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1533 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1534 1535 *newmat = mat; 1536 PetscFunctionReturn(0); 1537 } 1538 1539 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1540 { 1541 PetscErrorCode ierr; 1542 PetscMPIInt rank,size; 1543 const PetscInt *rowners; 1544 PetscInt i,m,n,nz,j,mMax; 1545 PetscScalar *array,*vals,*vals_ptr; 1546 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1547 1548 PetscFunctionBegin; 1549 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1550 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1551 1552 /* determine ownership of rows and columns */ 1553 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1554 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1555 1556 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1557 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1558 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1559 } 1560 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1561 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1562 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1563 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1564 if (!rank) { 1565 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1566 1567 /* read in my part of the matrix numerical values */ 1568 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1569 1570 /* insert into matrix-by row (this is why cannot directly read into array */ 1571 vals_ptr = vals; 1572 for (i=0; i<m; i++) { 1573 for (j=0; j<N; j++) { 1574 array[i + j*m] = *vals_ptr++; 1575 } 1576 } 1577 1578 /* read in other processors and ship out */ 1579 for (i=1; i<size; i++) { 1580 nz = (rowners[i+1] - rowners[i])*N; 1581 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1582 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1583 } 1584 } else { 1585 /* receive numeric values */ 1586 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1587 1588 /* receive message of values*/ 1589 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1590 1591 /* insert into matrix-by row (this is why cannot directly read into array */ 1592 vals_ptr = vals; 1593 for (i=0; i<m; i++) { 1594 for (j=0; j<N; j++) { 1595 array[i + j*m] = *vals_ptr++; 1596 } 1597 } 1598 } 1599 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1600 ierr = PetscFree(vals);CHKERRQ(ierr); 1601 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1602 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1607 { 1608 Mat_MPIDense *a; 1609 PetscScalar *vals,*svals; 1610 MPI_Comm comm; 1611 MPI_Status status; 1612 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1613 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1614 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1615 PetscInt i,nz,j,rstart,rend; 1616 int fd; 1617 PetscErrorCode ierr; 1618 1619 PetscFunctionBegin; 1620 /* force binary viewer to load .info file if it has not yet done so */ 1621 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1622 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1623 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1624 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1625 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1626 if (!rank) { 1627 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1628 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1629 } 1630 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1631 M = header[1]; N = header[2]; nz = header[3]; 1632 1633 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1634 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1635 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1636 1637 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1638 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1639 1640 /* 1641 Handle case where matrix is stored on disk as a dense matrix 1642 */ 1643 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1644 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 /* determine ownership of all rows */ 1649 if (newmat->rmap->n < 0) { 1650 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1651 } else { 1652 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1653 } 1654 if (newmat->cmap->n < 0) { 1655 n = PETSC_DECIDE; 1656 } else { 1657 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1658 } 1659 1660 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1661 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1662 rowners[0] = 0; 1663 for (i=2; i<=size; i++) { 1664 rowners[i] += rowners[i-1]; 1665 } 1666 rstart = rowners[rank]; 1667 rend = rowners[rank+1]; 1668 1669 /* distribute row lengths to all processors */ 1670 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1671 if (!rank) { 1672 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1673 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1674 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1675 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1676 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1677 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1678 } else { 1679 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1680 } 1681 1682 if (!rank) { 1683 /* calculate the number of nonzeros on each processor */ 1684 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1685 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1686 for (i=0; i<size; i++) { 1687 for (j=rowners[i]; j< rowners[i+1]; j++) { 1688 procsnz[i] += rowlengths[j]; 1689 } 1690 } 1691 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1692 1693 /* determine max buffer needed and allocate it */ 1694 maxnz = 0; 1695 for (i=0; i<size; i++) { 1696 maxnz = PetscMax(maxnz,procsnz[i]); 1697 } 1698 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1699 1700 /* read in my part of the matrix column indices */ 1701 nz = procsnz[0]; 1702 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1703 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1704 1705 /* read in every one elses and ship off */ 1706 for (i=1; i<size; i++) { 1707 nz = procsnz[i]; 1708 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1709 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1710 } 1711 ierr = PetscFree(cols);CHKERRQ(ierr); 1712 } else { 1713 /* determine buffer space needed for message */ 1714 nz = 0; 1715 for (i=0; i<m; i++) { 1716 nz += ourlens[i]; 1717 } 1718 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1719 1720 /* receive message of column indices*/ 1721 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1722 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1723 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1724 } 1725 1726 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1727 a = (Mat_MPIDense*)newmat->data; 1728 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1729 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1730 } 1731 1732 if (!rank) { 1733 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1734 1735 /* read in my part of the matrix numerical values */ 1736 nz = procsnz[0]; 1737 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1738 1739 /* insert into matrix */ 1740 jj = rstart; 1741 smycols = mycols; 1742 svals = vals; 1743 for (i=0; i<m; i++) { 1744 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1745 smycols += ourlens[i]; 1746 svals += ourlens[i]; 1747 jj++; 1748 } 1749 1750 /* read in other processors and ship out */ 1751 for (i=1; i<size; i++) { 1752 nz = procsnz[i]; 1753 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1754 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1755 } 1756 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1757 } else { 1758 /* receive numeric values */ 1759 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1760 1761 /* receive message of values*/ 1762 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1763 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1764 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1765 1766 /* insert into matrix */ 1767 jj = rstart; 1768 smycols = mycols; 1769 svals = vals; 1770 for (i=0; i<m; i++) { 1771 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1772 smycols += ourlens[i]; 1773 svals += ourlens[i]; 1774 jj++; 1775 } 1776 } 1777 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1778 ierr = PetscFree(vals);CHKERRQ(ierr); 1779 ierr = PetscFree(mycols);CHKERRQ(ierr); 1780 ierr = PetscFree(rowners);CHKERRQ(ierr); 1781 1782 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1783 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1788 { 1789 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1790 Mat a,b; 1791 PetscBool flg; 1792 PetscErrorCode ierr; 1793 1794 PetscFunctionBegin; 1795 a = matA->A; 1796 b = matB->A; 1797 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1798 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1803 { 1804 PetscErrorCode ierr; 1805 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1806 Mat_TransMatMultDense *atb = a->atbdense; 1807 1808 PetscFunctionBegin; 1809 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1810 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1811 ierr = PetscFree(atb);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1816 { 1817 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1818 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1819 Mat_TransMatMultDense *atb = c->atbdense; 1820 PetscErrorCode ierr; 1821 MPI_Comm comm; 1822 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1823 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1824 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1825 PetscScalar _DOne=1.0,_DZero=0.0; 1826 PetscBLASInt am,an,bn,aN; 1827 const PetscInt *ranges; 1828 1829 PetscFunctionBegin; 1830 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1831 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1832 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1833 1834 /* compute atbarray = aseq^T * bseq */ 1835 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1836 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1837 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1838 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1839 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1840 1841 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1842 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1843 1844 /* arrange atbarray into sendbuf */ 1845 k = 0; 1846 for (proc=0; proc<size; proc++) { 1847 for (j=0; j<cN; j++) { 1848 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1849 } 1850 } 1851 /* sum all atbarray to local values of C */ 1852 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1853 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1854 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1859 { 1860 PetscErrorCode ierr; 1861 Mat Cdense; 1862 MPI_Comm comm; 1863 PetscMPIInt size; 1864 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1865 Mat_MPIDense *c; 1866 Mat_TransMatMultDense *atb; 1867 1868 PetscFunctionBegin; 1869 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1870 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1871 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1872 } 1873 1874 /* create matrix product Cdense */ 1875 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1876 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1877 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1878 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1879 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 *C = Cdense; 1882 1883 /* create data structure for reuse Cdense */ 1884 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1885 ierr = PetscNew(&atb);CHKERRQ(ierr); 1886 cM = Cdense->rmap->N; 1887 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1888 1889 c = (Mat_MPIDense*)Cdense->data; 1890 c->atbdense = atb; 1891 atb->destroy = Cdense->ops->destroy; 1892 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1893 PetscFunctionReturn(0); 1894 } 1895 1896 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1897 { 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 if (scall == MAT_INITIAL_MATRIX) { 1902 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1903 } 1904 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1909 { 1910 PetscErrorCode ierr; 1911 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1912 Mat_MatMultDense *ab = a->abdense; 1913 1914 PetscFunctionBegin; 1915 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1916 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1917 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1918 1919 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1920 ierr = PetscFree(ab);CHKERRQ(ierr); 1921 PetscFunctionReturn(0); 1922 } 1923 1924 #if defined(PETSC_HAVE_ELEMENTAL) 1925 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1926 { 1927 PetscErrorCode ierr; 1928 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1929 Mat_MatMultDense *ab=c->abdense; 1930 1931 PetscFunctionBegin; 1932 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1933 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1934 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1935 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1940 { 1941 PetscErrorCode ierr; 1942 Mat Ae,Be,Ce; 1943 Mat_MPIDense *c; 1944 Mat_MatMultDense *ab; 1945 1946 PetscFunctionBegin; 1947 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1948 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1949 } 1950 1951 /* convert A and B to Elemental matrices Ae and Be */ 1952 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1953 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1954 1955 /* Ce = Ae*Be */ 1956 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1957 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1958 1959 /* convert Ce to C */ 1960 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1961 1962 /* create data structure for reuse Cdense */ 1963 ierr = PetscNew(&ab);CHKERRQ(ierr); 1964 c = (Mat_MPIDense*)(*C)->data; 1965 c->abdense = ab; 1966 1967 ab->Ae = Ae; 1968 ab->Be = Be; 1969 ab->Ce = Ce; 1970 ab->destroy = (*C)->ops->destroy; 1971 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1972 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1977 { 1978 PetscErrorCode ierr; 1979 1980 PetscFunctionBegin; 1981 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1982 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1983 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1984 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1985 } else { 1986 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1987 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1988 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1989 } 1990 PetscFunctionReturn(0); 1991 } 1992 #endif 1993 1994