1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 188 { 189 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 190 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 191 PetscErrorCode ierr; 192 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 193 const PetscInt *irow,*icol; 194 PetscScalar *av,*bv,*v = lmat->v; 195 Mat newmat; 196 IS iscol_local; 197 198 PetscFunctionBegin; 199 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 200 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 201 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 202 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 203 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 204 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 205 206 /* No parallel redistribution currently supported! Should really check each index set 207 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 208 original matrix! */ 209 210 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 211 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 212 213 /* Check submatrix call */ 214 if (scall == MAT_REUSE_MATRIX) { 215 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 216 /* Really need to test rows and column sizes! */ 217 newmat = *B; 218 } else { 219 /* Create and fill new matrix */ 220 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 221 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 222 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 223 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 224 } 225 226 /* Now extract the data pointers and do the copy, column at a time */ 227 newmatd = (Mat_MPIDense*)newmat->data; 228 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 229 230 for (i=0; i<Ncols; i++) { 231 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 232 for (j=0; j<nrows; j++) { 233 *bv++ = av[irow[j] - rstart]; 234 } 235 } 236 237 /* Assemble the matrices so that the correct flags are set */ 238 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 241 /* Free work space */ 242 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 243 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 244 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 245 *B = newmat; 246 PetscFunctionReturn(0); 247 } 248 249 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 260 { 261 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 262 PetscErrorCode ierr; 263 264 PetscFunctionBegin; 265 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 272 MPI_Comm comm; 273 PetscErrorCode ierr; 274 PetscInt nstash,reallocs; 275 InsertMode addv; 276 277 PetscFunctionBegin; 278 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 279 /* make sure all processors are either in INSERTMODE or ADDMODE */ 280 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 281 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 282 mat->insertmode = addv; /* in case this processor had no cache */ 283 284 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 285 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 286 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 291 { 292 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 293 PetscErrorCode ierr; 294 PetscInt i,*row,*col,flg,j,rstart,ncols; 295 PetscMPIInt n; 296 PetscScalar *val; 297 298 PetscFunctionBegin; 299 /* wait on receives */ 300 while (1) { 301 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 302 if (!flg) break; 303 304 for (i=0; i<n;) { 305 /* Now identify the consecutive vals belonging to the same row */ 306 for (j=i,rstart=row[j]; j<n; j++) { 307 if (row[j] != rstart) break; 308 } 309 if (j < n) ncols = j-i; 310 else ncols = n-i; 311 /* Now assemble all these values with a single function call */ 312 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 313 i = j; 314 } 315 } 316 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 320 321 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 322 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 328 { 329 PetscErrorCode ierr; 330 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 331 332 PetscFunctionBegin; 333 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 /* the code does not do the diagonal entries correctly unless the 338 matrix is square and the column and row owerships are identical. 339 This is a BUG. The only way to fix it seems to be to access 340 mdn->A and mdn->B directly and not through the MatZeroRows() 341 routine. 342 */ 343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 344 { 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 PetscErrorCode ierr; 347 PetscInt i,*owners = A->rmap->range; 348 PetscInt *sizes,j,idx,nsends; 349 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 350 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 351 PetscInt *lens,*lrows,*values; 352 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 353 MPI_Comm comm; 354 MPI_Request *send_waits,*recv_waits; 355 MPI_Status recv_status,*send_status; 356 PetscBool found; 357 const PetscScalar *xx; 358 PetscScalar *bb; 359 360 PetscFunctionBegin; 361 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 362 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 363 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 364 /* first count number of contributors to each processor */ 365 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 366 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 367 for (i=0; i<N; i++) { 368 idx = rows[i]; 369 found = PETSC_FALSE; 370 for (j=0; j<size; j++) { 371 if (idx >= owners[j] && idx < owners[j+1]) { 372 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 373 } 374 } 375 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 376 } 377 nsends = 0; 378 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 379 380 /* inform other processors of number of messages and max length*/ 381 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 382 383 /* post receives: */ 384 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 385 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 386 for (i=0; i<nrecvs; i++) { 387 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 388 } 389 390 /* do sends: 391 1) starts[i] gives the starting index in svalues for stuff going to 392 the ith processor 393 */ 394 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 395 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 396 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 397 398 starts[0] = 0; 399 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 400 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 401 402 starts[0] = 0; 403 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 404 count = 0; 405 for (i=0; i<size; i++) { 406 if (sizes[2*i+1]) { 407 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 408 } 409 } 410 ierr = PetscFree(starts);CHKERRQ(ierr); 411 412 base = owners[rank]; 413 414 /* wait on receives */ 415 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 416 count = nrecvs; 417 slen = 0; 418 while (count) { 419 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 420 /* unpack receives into our local space */ 421 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 422 423 source[imdex] = recv_status.MPI_SOURCE; 424 lens[imdex] = n; 425 slen += n; 426 count--; 427 } 428 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 429 430 /* move the data into the send scatter */ 431 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 432 count = 0; 433 for (i=0; i<nrecvs; i++) { 434 values = rvalues + i*nmax; 435 for (j=0; j<lens[i]; j++) { 436 lrows[count++] = values[j] - base; 437 } 438 } 439 ierr = PetscFree(rvalues);CHKERRQ(ierr); 440 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 441 ierr = PetscFree(owner);CHKERRQ(ierr); 442 ierr = PetscFree(sizes);CHKERRQ(ierr); 443 444 /* fix right hand side if needed */ 445 if (x && b) { 446 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 447 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 448 for (i=0; i<slen; i++) { 449 bb[lrows[i]] = diag*xx[lrows[i]]; 450 } 451 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 452 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 453 } 454 455 /* actually zap the local rows */ 456 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 457 if (diag != 0.0) { 458 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 459 PetscInt m = ll->lda, i; 460 461 for (i=0; i<slen; i++) { 462 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 463 } 464 } 465 ierr = PetscFree(lrows);CHKERRQ(ierr); 466 467 /* wait on sends */ 468 if (nsends) { 469 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 470 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 471 ierr = PetscFree(send_status);CHKERRQ(ierr); 472 } 473 ierr = PetscFree(send_waits);CHKERRQ(ierr); 474 ierr = PetscFree(svalues);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 479 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 480 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 481 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 482 483 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 484 { 485 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 496 { 497 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 498 PetscErrorCode ierr; 499 500 PetscFunctionBegin; 501 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 502 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 503 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 508 { 509 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 510 PetscErrorCode ierr; 511 PetscScalar zero = 0.0; 512 513 PetscFunctionBegin; 514 ierr = VecSet(yy,zero);CHKERRQ(ierr); 515 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 516 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 522 { 523 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 524 PetscErrorCode ierr; 525 526 PetscFunctionBegin; 527 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 528 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 529 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 530 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 535 { 536 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 537 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 538 PetscErrorCode ierr; 539 PetscInt len,i,n,m = A->rmap->n,radd; 540 PetscScalar *x,zero = 0.0; 541 542 PetscFunctionBegin; 543 ierr = VecSet(v,zero);CHKERRQ(ierr); 544 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 545 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 546 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 547 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 548 radd = A->rmap->rstart*m; 549 for (i=0; i<len; i++) { 550 x[i] = aloc->v[radd + i*m + i]; 551 } 552 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 553 PetscFunctionReturn(0); 554 } 555 556 PetscErrorCode MatDestroy_MPIDense(Mat mat) 557 { 558 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 559 PetscErrorCode ierr; 560 561 PetscFunctionBegin; 562 #if defined(PETSC_USE_LOG) 563 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 564 #endif 565 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 566 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 567 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 568 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 569 570 ierr = PetscFree(mat->data);CHKERRQ(ierr); 571 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 572 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 579 #if defined(PETSC_HAVE_ELEMENTAL) 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 581 #endif 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 595 { 596 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 597 PetscErrorCode ierr; 598 PetscViewerFormat format; 599 int fd; 600 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 601 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 602 PetscScalar *work,*v,*vv; 603 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 604 605 PetscFunctionBegin; 606 if (mdn->size == 1) { 607 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 608 } else { 609 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 610 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 611 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 612 613 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 614 if (format == PETSC_VIEWER_NATIVE) { 615 616 if (!rank) { 617 /* store the matrix as a dense matrix */ 618 header[0] = MAT_FILE_CLASSID; 619 header[1] = mat->rmap->N; 620 header[2] = N; 621 header[3] = MATRIX_BINARY_FORMAT_DENSE; 622 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 623 624 /* get largest work array needed for transposing array */ 625 mmax = mat->rmap->n; 626 for (i=1; i<size; i++) { 627 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 628 } 629 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 630 631 /* write out local array, by rows */ 632 m = mat->rmap->n; 633 v = a->v; 634 for (j=0; j<N; j++) { 635 for (i=0; i<m; i++) { 636 work[j + i*N] = *v++; 637 } 638 } 639 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 640 /* get largest work array to receive messages from other processes, excludes process zero */ 641 mmax = 0; 642 for (i=1; i<size; i++) { 643 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 644 } 645 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 646 for (k = 1; k < size; k++) { 647 v = vv; 648 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 649 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 650 651 for (j = 0; j < N; j++) { 652 for (i = 0; i < m; i++) { 653 work[j + i*N] = *v++; 654 } 655 } 656 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 657 } 658 ierr = PetscFree(work);CHKERRQ(ierr); 659 ierr = PetscFree(vv);CHKERRQ(ierr); 660 } else { 661 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 673 PetscErrorCode ierr; 674 PetscMPIInt rank = mdn->rank; 675 PetscViewerType vtype; 676 PetscBool iascii,isdraw; 677 PetscViewer sviewer; 678 PetscViewerFormat format; 679 680 PetscFunctionBegin; 681 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 682 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 683 if (iascii) { 684 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 685 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 689 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 691 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 693 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } else if (format == PETSC_VIEWER_ASCII_INFO) { 696 PetscFunctionReturn(0); 697 } 698 } else if (isdraw) { 699 PetscDraw draw; 700 PetscBool isnull; 701 702 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 703 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 704 if (isnull) PetscFunctionReturn(0); 705 } 706 707 { 708 /* assemble the entire matrix onto first processor. */ 709 Mat A; 710 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 711 PetscInt *cols; 712 PetscScalar *vals; 713 714 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 715 if (!rank) { 716 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 717 } else { 718 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 719 } 720 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 721 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 722 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 723 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i=0; i<m; i++) { 732 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 733 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 734 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 735 row++; 736 } 737 738 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 740 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 741 if (!rank) { 742 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 743 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 744 } 745 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 746 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 747 ierr = MatDestroy(&A);CHKERRQ(ierr); 748 } 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 753 { 754 PetscErrorCode ierr; 755 PetscBool iascii,isbinary,isdraw,issocket; 756 757 PetscFunctionBegin; 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 759 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 760 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 761 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 762 763 if (iascii || issocket || isdraw) { 764 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 765 } else if (isbinary) { 766 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 767 } 768 PetscFunctionReturn(0); 769 } 770 771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 772 { 773 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 774 Mat mdn = mat->A; 775 PetscErrorCode ierr; 776 PetscReal isend[5],irecv[5]; 777 778 PetscFunctionBegin; 779 info->block_size = 1.0; 780 781 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 782 783 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 784 isend[3] = info->memory; isend[4] = info->mallocs; 785 if (flag == MAT_LOCAL) { 786 info->nz_used = isend[0]; 787 info->nz_allocated = isend[1]; 788 info->nz_unneeded = isend[2]; 789 info->memory = isend[3]; 790 info->mallocs = isend[4]; 791 } else if (flag == MAT_GLOBAL_MAX) { 792 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 793 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } else if (flag == MAT_GLOBAL_SUM) { 800 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 801 802 info->nz_used = irecv[0]; 803 info->nz_allocated = irecv[1]; 804 info->nz_unneeded = irecv[2]; 805 info->memory = irecv[3]; 806 info->mallocs = irecv[4]; 807 } 808 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 809 info->fill_ratio_needed = 0; 810 info->factor_mallocs = 0; 811 PetscFunctionReturn(0); 812 } 813 814 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 815 { 816 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 817 PetscErrorCode ierr; 818 819 PetscFunctionBegin; 820 switch (op) { 821 case MAT_NEW_NONZERO_LOCATIONS: 822 case MAT_NEW_NONZERO_LOCATION_ERR: 823 case MAT_NEW_NONZERO_ALLOCATION_ERR: 824 MatCheckPreallocated(A,1); 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_ROW_ORIENTED: 828 MatCheckPreallocated(A,1); 829 a->roworiented = flg; 830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 831 break; 832 case MAT_NEW_DIAGONALS: 833 case MAT_KEEP_NONZERO_PATTERN: 834 case MAT_USE_HASH_TABLE: 835 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 836 break; 837 case MAT_IGNORE_OFF_PROC_ENTRIES: 838 a->donotstash = flg; 839 break; 840 case MAT_SYMMETRIC: 841 case MAT_STRUCTURALLY_SYMMETRIC: 842 case MAT_HERMITIAN: 843 case MAT_SYMMETRY_ETERNAL: 844 case MAT_IGNORE_LOWER_TRIANGULAR: 845 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 846 break; 847 default: 848 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 849 } 850 PetscFunctionReturn(0); 851 } 852 853 854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 855 { 856 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 857 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 858 const PetscScalar *l,*r; 859 PetscScalar x,*v; 860 PetscErrorCode ierr; 861 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 862 863 PetscFunctionBegin; 864 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 865 if (ll) { 866 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 867 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 868 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 869 for (i=0; i<m; i++) { 870 x = l[i]; 871 v = mat->v + i; 872 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 873 } 874 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 875 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 876 } 877 if (rr) { 878 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 879 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 880 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 883 for (i=0; i<n; i++) { 884 x = r[i]; 885 v = mat->v + i*m; 886 for (j=0; j<m; j++) (*v++) *= x; 887 } 888 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 889 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 890 } 891 PetscFunctionReturn(0); 892 } 893 894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 895 { 896 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 897 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 898 PetscErrorCode ierr; 899 PetscInt i,j; 900 PetscReal sum = 0.0; 901 PetscScalar *v = mat->v; 902 903 PetscFunctionBegin; 904 if (mdn->size == 1) { 905 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 906 } else { 907 if (type == NORM_FROBENIUS) { 908 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 909 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 910 } 911 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 912 *nrm = PetscSqrtReal(*nrm); 913 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 914 } else if (type == NORM_1) { 915 PetscReal *tmp,*tmp2; 916 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 917 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 918 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 919 *nrm = 0.0; 920 v = mat->v; 921 for (j=0; j<mdn->A->cmap->n; j++) { 922 for (i=0; i<mdn->A->rmap->n; i++) { 923 tmp[j] += PetscAbsScalar(*v); v++; 924 } 925 } 926 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 927 for (j=0; j<A->cmap->N; j++) { 928 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 929 } 930 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 931 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 932 } else if (type == NORM_INFINITY) { /* max row norm */ 933 PetscReal ntemp; 934 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 935 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 936 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 937 } 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 942 { 943 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 944 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 945 Mat B; 946 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 947 PetscErrorCode ierr; 948 PetscInt j,i; 949 PetscScalar *v; 950 951 PetscFunctionBegin; 952 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 953 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 954 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 955 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 956 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 957 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 958 } else { 959 B = *matout; 960 } 961 962 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 963 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 964 for (i=0; i<m; i++) rwork[i] = rstart + i; 965 for (j=0; j<n; j++) { 966 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 967 v += m; 968 } 969 ierr = PetscFree(rwork);CHKERRQ(ierr); 970 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 973 *matout = B; 974 } else { 975 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 983 984 PetscErrorCode MatSetUp_MPIDense(Mat A) 985 { 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 994 { 995 PetscErrorCode ierr; 996 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 997 998 PetscFunctionBegin; 999 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1000 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1005 { 1006 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 PetscErrorCode MatRealPart_MPIDense(Mat A) 1015 { 1016 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1025 { 1026 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1027 PetscErrorCode ierr; 1028 1029 PetscFunctionBegin; 1030 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1035 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1036 { 1037 PetscErrorCode ierr; 1038 PetscInt i,n; 1039 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1040 PetscReal *work; 1041 1042 PetscFunctionBegin; 1043 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1044 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1045 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1046 if (type == NORM_2) { 1047 for (i=0; i<n; i++) work[i] *= work[i]; 1048 } 1049 if (type == NORM_INFINITY) { 1050 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1051 } else { 1052 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1053 } 1054 ierr = PetscFree(work);CHKERRQ(ierr); 1055 if (type == NORM_2) { 1056 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1062 { 1063 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1064 PetscErrorCode ierr; 1065 PetscScalar *a; 1066 PetscInt m,n,i; 1067 1068 PetscFunctionBegin; 1069 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1070 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1071 for (i=0; i<m*n; i++) { 1072 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1073 } 1074 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1079 1080 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1081 { 1082 PetscFunctionBegin; 1083 *missing = PETSC_FALSE; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* -------------------------------------------------------------------*/ 1088 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1089 MatGetRow_MPIDense, 1090 MatRestoreRow_MPIDense, 1091 MatMult_MPIDense, 1092 /* 4*/ MatMultAdd_MPIDense, 1093 MatMultTranspose_MPIDense, 1094 MatMultTransposeAdd_MPIDense, 1095 0, 1096 0, 1097 0, 1098 /* 10*/ 0, 1099 0, 1100 0, 1101 0, 1102 MatTranspose_MPIDense, 1103 /* 15*/ MatGetInfo_MPIDense, 1104 MatEqual_MPIDense, 1105 MatGetDiagonal_MPIDense, 1106 MatDiagonalScale_MPIDense, 1107 MatNorm_MPIDense, 1108 /* 20*/ MatAssemblyBegin_MPIDense, 1109 MatAssemblyEnd_MPIDense, 1110 MatSetOption_MPIDense, 1111 MatZeroEntries_MPIDense, 1112 /* 24*/ MatZeroRows_MPIDense, 1113 0, 1114 0, 1115 0, 1116 0, 1117 /* 29*/ MatSetUp_MPIDense, 1118 0, 1119 0, 1120 MatGetDiagonalBlock_MPIDense, 1121 0, 1122 /* 34*/ MatDuplicate_MPIDense, 1123 0, 1124 0, 1125 0, 1126 0, 1127 /* 39*/ MatAXPY_MPIDense, 1128 MatCreateSubMatrices_MPIDense, 1129 0, 1130 MatGetValues_MPIDense, 1131 0, 1132 /* 44*/ 0, 1133 MatScale_MPIDense, 1134 MatShift_Basic, 1135 0, 1136 0, 1137 /* 49*/ MatSetRandom_MPIDense, 1138 0, 1139 0, 1140 0, 1141 0, 1142 /* 54*/ 0, 1143 0, 1144 0, 1145 0, 1146 0, 1147 /* 59*/ MatCreateSubMatrix_MPIDense, 1148 MatDestroy_MPIDense, 1149 MatView_MPIDense, 1150 0, 1151 0, 1152 /* 64*/ 0, 1153 0, 1154 0, 1155 0, 1156 0, 1157 /* 69*/ 0, 1158 0, 1159 0, 1160 0, 1161 0, 1162 /* 74*/ 0, 1163 0, 1164 0, 1165 0, 1166 0, 1167 /* 79*/ 0, 1168 0, 1169 0, 1170 0, 1171 /* 83*/ MatLoad_MPIDense, 1172 0, 1173 0, 1174 0, 1175 0, 1176 0, 1177 #if defined(PETSC_HAVE_ELEMENTAL) 1178 /* 89*/ MatMatMult_MPIDense_MPIDense, 1179 MatMatMultSymbolic_MPIDense_MPIDense, 1180 #else 1181 /* 89*/ 0, 1182 0, 1183 #endif 1184 MatMatMultNumeric_MPIDense, 1185 0, 1186 0, 1187 /* 94*/ 0, 1188 0, 1189 0, 1190 0, 1191 0, 1192 /* 99*/ 0, 1193 0, 1194 0, 1195 MatConjugate_MPIDense, 1196 0, 1197 /*104*/ 0, 1198 MatRealPart_MPIDense, 1199 MatImaginaryPart_MPIDense, 1200 0, 1201 0, 1202 /*109*/ 0, 1203 0, 1204 0, 1205 0, 1206 MatMissingDiagonal_MPIDense, 1207 /*114*/ 0, 1208 0, 1209 0, 1210 0, 1211 0, 1212 /*119*/ 0, 1213 0, 1214 0, 1215 0, 1216 0, 1217 /*124*/ 0, 1218 MatGetColumnNorms_MPIDense, 1219 0, 1220 0, 1221 0, 1222 /*129*/ 0, 1223 MatTransposeMatMult_MPIDense_MPIDense, 1224 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1225 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1226 0, 1227 /*134*/ 0, 1228 0, 1229 0, 1230 0, 1231 0, 1232 /*139*/ 0, 1233 0, 1234 0 1235 }; 1236 1237 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1238 { 1239 Mat_MPIDense *a; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 mat->preallocated = PETSC_TRUE; 1244 /* Note: For now, when data is specified above, this assumes the user correctly 1245 allocates the local dense storage space. We should add error checking. */ 1246 1247 a = (Mat_MPIDense*)mat->data; 1248 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1249 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1250 a->nvec = mat->cmap->n; 1251 1252 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1253 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1254 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1255 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1256 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #if defined(PETSC_HAVE_ELEMENTAL) 1261 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1262 { 1263 Mat mat_elemental; 1264 PetscErrorCode ierr; 1265 PetscScalar *v; 1266 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1267 1268 PetscFunctionBegin; 1269 if (reuse == MAT_REUSE_MATRIX) { 1270 mat_elemental = *newmat; 1271 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1272 } else { 1273 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1274 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1275 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1276 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1277 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1278 } 1279 1280 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1281 for (i=0; i<N; i++) cols[i] = i; 1282 for (i=0; i<m; i++) rows[i] = rstart + i; 1283 1284 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1285 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1286 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1287 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1288 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1289 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1290 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1291 1292 if (reuse == MAT_INPLACE_MATRIX) { 1293 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1294 } else { 1295 *newmat = mat_elemental; 1296 } 1297 PetscFunctionReturn(0); 1298 } 1299 #endif 1300 1301 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1302 { 1303 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1304 PetscErrorCode ierr; 1305 1306 PetscFunctionBegin; 1307 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1312 { 1313 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1314 PetscErrorCode ierr; 1315 1316 PetscFunctionBegin; 1317 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1322 { 1323 Mat_MPIDense *a; 1324 PetscErrorCode ierr; 1325 1326 PetscFunctionBegin; 1327 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1328 mat->data = (void*)a; 1329 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1330 1331 mat->insertmode = NOT_SET_VALUES; 1332 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1333 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1334 1335 /* build cache for off array entries formed */ 1336 a->donotstash = PETSC_FALSE; 1337 1338 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1339 1340 /* stuff used for matrix vector multiply */ 1341 a->lvec = 0; 1342 a->Mvctx = 0; 1343 a->roworiented = PETSC_TRUE; 1344 1345 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1346 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1347 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1348 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1349 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1351 #if defined(PETSC_HAVE_ELEMENTAL) 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1353 #endif 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1358 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1363 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1364 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 /*MC 1369 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1370 1371 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1372 and MATMPIDENSE otherwise. 1373 1374 Options Database Keys: 1375 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1376 1377 Level: beginner 1378 1379 1380 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1381 M*/ 1382 1383 /*@C 1384 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1385 1386 Not collective 1387 1388 Input Parameters: 1389 . B - the matrix 1390 - data - optional location of matrix data. Set data=NULL for PETSc 1391 to control all matrix memory allocation. 1392 1393 Notes: 1394 The dense format is fully compatible with standard Fortran 77 1395 storage by columns. 1396 1397 The data input variable is intended primarily for Fortran programmers 1398 who wish to allocate their own matrix memory space. Most users should 1399 set data=NULL. 1400 1401 Level: intermediate 1402 1403 .keywords: matrix,dense, parallel 1404 1405 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1406 @*/ 1407 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1408 { 1409 PetscErrorCode ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /*@ 1417 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1418 array provided by the user. This is useful to avoid copying an array 1419 into a matrix 1420 1421 Not Collective 1422 1423 Input Parameters: 1424 + mat - the matrix 1425 - array - the array in column major order 1426 1427 Notes: 1428 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1429 freed when the matrix is destroyed. 1430 1431 Level: developer 1432 1433 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1434 1435 @*/ 1436 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1437 { 1438 PetscErrorCode ierr; 1439 PetscFunctionBegin; 1440 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1441 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 /*@ 1446 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1447 1448 Not Collective 1449 1450 Input Parameters: 1451 . mat - the matrix 1452 1453 Notes: 1454 You can only call this after a call to MatDensePlaceArray() 1455 1456 Level: developer 1457 1458 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1459 1460 @*/ 1461 PetscErrorCode MatDenseResetArray(Mat mat) 1462 { 1463 PetscErrorCode ierr; 1464 PetscFunctionBegin; 1465 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1466 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 /*@C 1471 MatCreateDense - Creates a parallel matrix in dense format. 1472 1473 Collective on MPI_Comm 1474 1475 Input Parameters: 1476 + comm - MPI communicator 1477 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1478 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1479 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1480 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1481 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1482 to control all matrix memory allocation. 1483 1484 Output Parameter: 1485 . A - the matrix 1486 1487 Notes: 1488 The dense format is fully compatible with standard Fortran 77 1489 storage by columns. 1490 1491 The data input variable is intended primarily for Fortran programmers 1492 who wish to allocate their own matrix memory space. Most users should 1493 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1494 1495 The user MUST specify either the local or global matrix dimensions 1496 (possibly both). 1497 1498 Level: intermediate 1499 1500 .keywords: matrix,dense, parallel 1501 1502 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1503 @*/ 1504 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1505 { 1506 PetscErrorCode ierr; 1507 PetscMPIInt size; 1508 1509 PetscFunctionBegin; 1510 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1511 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1512 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1513 if (size > 1) { 1514 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1515 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1516 if (data) { /* user provided data array, so no need to assemble */ 1517 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1518 (*A)->assembled = PETSC_TRUE; 1519 } 1520 } else { 1521 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1522 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1528 { 1529 Mat mat; 1530 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 *newmat = 0; 1535 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1536 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1537 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1538 a = (Mat_MPIDense*)mat->data; 1539 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1540 1541 mat->factortype = A->factortype; 1542 mat->assembled = PETSC_TRUE; 1543 mat->preallocated = PETSC_TRUE; 1544 1545 a->size = oldmat->size; 1546 a->rank = oldmat->rank; 1547 mat->insertmode = NOT_SET_VALUES; 1548 a->nvec = oldmat->nvec; 1549 a->donotstash = oldmat->donotstash; 1550 1551 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1552 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1553 1554 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1555 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1556 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1557 1558 *newmat = mat; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1563 { 1564 PetscErrorCode ierr; 1565 PetscMPIInt rank,size; 1566 const PetscInt *rowners; 1567 PetscInt i,m,n,nz,j,mMax; 1568 PetscScalar *array,*vals,*vals_ptr; 1569 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1570 1571 PetscFunctionBegin; 1572 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1574 1575 /* determine ownership of rows and columns */ 1576 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1577 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1578 1579 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1580 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1581 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1582 } 1583 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1584 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1585 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1586 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1587 if (!rank) { 1588 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1589 1590 /* read in my part of the matrix numerical values */ 1591 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1592 1593 /* insert into matrix-by row (this is why cannot directly read into array */ 1594 vals_ptr = vals; 1595 for (i=0; i<m; i++) { 1596 for (j=0; j<N; j++) { 1597 array[i + j*m] = *vals_ptr++; 1598 } 1599 } 1600 1601 /* read in other processors and ship out */ 1602 for (i=1; i<size; i++) { 1603 nz = (rowners[i+1] - rowners[i])*N; 1604 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1605 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1606 } 1607 } else { 1608 /* receive numeric values */ 1609 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1610 1611 /* receive message of values*/ 1612 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1613 1614 /* insert into matrix-by row (this is why cannot directly read into array */ 1615 vals_ptr = vals; 1616 for (i=0; i<m; i++) { 1617 for (j=0; j<N; j++) { 1618 array[i + j*m] = *vals_ptr++; 1619 } 1620 } 1621 } 1622 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1623 ierr = PetscFree(vals);CHKERRQ(ierr); 1624 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1625 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1630 { 1631 Mat_MPIDense *a; 1632 PetscScalar *vals,*svals; 1633 MPI_Comm comm; 1634 MPI_Status status; 1635 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1636 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1637 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1638 PetscInt i,nz,j,rstart,rend; 1639 int fd; 1640 PetscErrorCode ierr; 1641 1642 PetscFunctionBegin; 1643 /* force binary viewer to load .info file if it has not yet done so */ 1644 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1645 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1646 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1647 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1648 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1649 if (!rank) { 1650 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1651 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1652 } 1653 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1654 M = header[1]; N = header[2]; nz = header[3]; 1655 1656 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1657 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1658 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1659 1660 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1661 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1662 1663 /* 1664 Handle case where matrix is stored on disk as a dense matrix 1665 */ 1666 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1667 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1668 PetscFunctionReturn(0); 1669 } 1670 1671 /* determine ownership of all rows */ 1672 if (newmat->rmap->n < 0) { 1673 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1674 } else { 1675 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1676 } 1677 if (newmat->cmap->n < 0) { 1678 n = PETSC_DECIDE; 1679 } else { 1680 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1681 } 1682 1683 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1684 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1685 rowners[0] = 0; 1686 for (i=2; i<=size; i++) { 1687 rowners[i] += rowners[i-1]; 1688 } 1689 rstart = rowners[rank]; 1690 rend = rowners[rank+1]; 1691 1692 /* distribute row lengths to all processors */ 1693 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1694 if (!rank) { 1695 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1696 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1697 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1698 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1699 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1700 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1701 } else { 1702 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1703 } 1704 1705 if (!rank) { 1706 /* calculate the number of nonzeros on each processor */ 1707 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1708 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1709 for (i=0; i<size; i++) { 1710 for (j=rowners[i]; j< rowners[i+1]; j++) { 1711 procsnz[i] += rowlengths[j]; 1712 } 1713 } 1714 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1715 1716 /* determine max buffer needed and allocate it */ 1717 maxnz = 0; 1718 for (i=0; i<size; i++) { 1719 maxnz = PetscMax(maxnz,procsnz[i]); 1720 } 1721 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1722 1723 /* read in my part of the matrix column indices */ 1724 nz = procsnz[0]; 1725 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1726 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1727 1728 /* read in every one elses and ship off */ 1729 for (i=1; i<size; i++) { 1730 nz = procsnz[i]; 1731 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1732 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1733 } 1734 ierr = PetscFree(cols);CHKERRQ(ierr); 1735 } else { 1736 /* determine buffer space needed for message */ 1737 nz = 0; 1738 for (i=0; i<m; i++) { 1739 nz += ourlens[i]; 1740 } 1741 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1742 1743 /* receive message of column indices*/ 1744 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1745 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1746 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1747 } 1748 1749 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1750 a = (Mat_MPIDense*)newmat->data; 1751 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1752 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1753 } 1754 1755 if (!rank) { 1756 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1757 1758 /* read in my part of the matrix numerical values */ 1759 nz = procsnz[0]; 1760 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1761 1762 /* insert into matrix */ 1763 jj = rstart; 1764 smycols = mycols; 1765 svals = vals; 1766 for (i=0; i<m; i++) { 1767 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1768 smycols += ourlens[i]; 1769 svals += ourlens[i]; 1770 jj++; 1771 } 1772 1773 /* read in other processors and ship out */ 1774 for (i=1; i<size; i++) { 1775 nz = procsnz[i]; 1776 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1777 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1778 } 1779 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1780 } else { 1781 /* receive numeric values */ 1782 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1783 1784 /* receive message of values*/ 1785 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1786 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1787 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1788 1789 /* insert into matrix */ 1790 jj = rstart; 1791 smycols = mycols; 1792 svals = vals; 1793 for (i=0; i<m; i++) { 1794 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1795 smycols += ourlens[i]; 1796 svals += ourlens[i]; 1797 jj++; 1798 } 1799 } 1800 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1801 ierr = PetscFree(vals);CHKERRQ(ierr); 1802 ierr = PetscFree(mycols);CHKERRQ(ierr); 1803 ierr = PetscFree(rowners);CHKERRQ(ierr); 1804 1805 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1806 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1811 { 1812 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1813 Mat a,b; 1814 PetscBool flg; 1815 PetscErrorCode ierr; 1816 1817 PetscFunctionBegin; 1818 a = matA->A; 1819 b = matB->A; 1820 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1821 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1826 { 1827 PetscErrorCode ierr; 1828 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1829 Mat_TransMatMultDense *atb = a->atbdense; 1830 1831 PetscFunctionBegin; 1832 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1833 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1834 ierr = PetscFree(atb);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1839 { 1840 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1841 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1842 Mat_TransMatMultDense *atb = c->atbdense; 1843 PetscErrorCode ierr; 1844 MPI_Comm comm; 1845 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1846 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1847 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1848 PetscScalar _DOne=1.0,_DZero=0.0; 1849 PetscBLASInt am,an,bn,aN; 1850 const PetscInt *ranges; 1851 1852 PetscFunctionBegin; 1853 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1854 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1855 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1856 1857 /* compute atbarray = aseq^T * bseq */ 1858 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1859 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1860 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1861 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1862 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1863 1864 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1865 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1866 1867 /* arrange atbarray into sendbuf */ 1868 k = 0; 1869 for (proc=0; proc<size; proc++) { 1870 for (j=0; j<cN; j++) { 1871 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1872 } 1873 } 1874 /* sum all atbarray to local values of C */ 1875 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1876 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1877 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1882 { 1883 PetscErrorCode ierr; 1884 Mat Cdense; 1885 MPI_Comm comm; 1886 PetscMPIInt size; 1887 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1888 Mat_MPIDense *c; 1889 Mat_TransMatMultDense *atb; 1890 1891 PetscFunctionBegin; 1892 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1893 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1894 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1895 } 1896 1897 /* create matrix product Cdense */ 1898 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1899 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1900 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1901 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1902 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1903 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1904 *C = Cdense; 1905 1906 /* create data structure for reuse Cdense */ 1907 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1908 ierr = PetscNew(&atb);CHKERRQ(ierr); 1909 cM = Cdense->rmap->N; 1910 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1911 1912 c = (Mat_MPIDense*)Cdense->data; 1913 c->atbdense = atb; 1914 atb->destroy = Cdense->ops->destroy; 1915 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1916 PetscFunctionReturn(0); 1917 } 1918 1919 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1920 { 1921 PetscErrorCode ierr; 1922 1923 PetscFunctionBegin; 1924 if (scall == MAT_INITIAL_MATRIX) { 1925 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1926 } 1927 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1932 { 1933 PetscErrorCode ierr; 1934 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1935 Mat_MatMultDense *ab = a->abdense; 1936 1937 PetscFunctionBegin; 1938 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1939 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1940 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1941 1942 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1943 ierr = PetscFree(ab);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #if defined(PETSC_HAVE_ELEMENTAL) 1948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1949 { 1950 PetscErrorCode ierr; 1951 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1952 Mat_MatMultDense *ab=c->abdense; 1953 1954 PetscFunctionBegin; 1955 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1956 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1957 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1958 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1963 { 1964 PetscErrorCode ierr; 1965 Mat Ae,Be,Ce; 1966 Mat_MPIDense *c; 1967 Mat_MatMultDense *ab; 1968 1969 PetscFunctionBegin; 1970 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1971 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1972 } 1973 1974 /* convert A and B to Elemental matrices Ae and Be */ 1975 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1976 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1977 1978 /* Ce = Ae*Be */ 1979 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1980 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1981 1982 /* convert Ce to C */ 1983 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1984 1985 /* create data structure for reuse Cdense */ 1986 ierr = PetscNew(&ab);CHKERRQ(ierr); 1987 c = (Mat_MPIDense*)(*C)->data; 1988 c->abdense = ab; 1989 1990 ab->Ae = Ae; 1991 ab->Be = Be; 1992 ab->Ce = Ce; 1993 ab->destroy = (*C)->ops->destroy; 1994 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1995 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1996 PetscFunctionReturn(0); 1997 } 1998 1999 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2000 { 2001 PetscErrorCode ierr; 2002 2003 PetscFunctionBegin; 2004 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2005 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2006 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2007 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2008 } else { 2009 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2010 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2011 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 #endif 2016 2017