1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 198 { 199 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 200 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 201 PetscErrorCode ierr; 202 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 203 const PetscInt *irow,*icol; 204 PetscScalar *av,*bv,*v = lmat->v; 205 Mat newmat; 206 IS iscol_local; 207 MPI_Comm comm_is,comm_mat; 208 209 PetscFunctionBegin; 210 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 211 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 212 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 213 214 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 215 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 216 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 218 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 219 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 220 221 /* No parallel redistribution currently supported! Should really check each index set 222 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 223 original matrix! */ 224 225 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 226 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 227 228 /* Check submatrix call */ 229 if (scall == MAT_REUSE_MATRIX) { 230 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 231 /* Really need to test rows and column sizes! */ 232 newmat = *B; 233 } else { 234 /* Create and fill new matrix */ 235 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 236 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 237 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 238 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 239 } 240 241 /* Now extract the data pointers and do the copy, column at a time */ 242 newmatd = (Mat_MPIDense*)newmat->data; 243 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 244 245 for (i=0; i<Ncols; i++) { 246 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 247 for (j=0; j<nrows; j++) { 248 *bv++ = av[irow[j] - rstart]; 249 } 250 } 251 252 /* Assemble the matrices so that the correct flags are set */ 253 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 256 /* Free work space */ 257 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 259 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 260 *B = newmat; 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 287 MPI_Comm comm; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 294 /* make sure all processors are either in INSERTMODE or ADDMODE */ 295 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 296 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 297 mat->insertmode = addv; /* in case this processor had no cache */ 298 299 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 300 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 301 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 306 { 307 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 308 PetscErrorCode ierr; 309 PetscInt i,*row,*col,flg,j,rstart,ncols; 310 PetscMPIInt n; 311 PetscScalar *val; 312 313 PetscFunctionBegin; 314 /* wait on receives */ 315 while (1) { 316 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 317 if (!flg) break; 318 319 for (i=0; i<n;) { 320 /* Now identify the consecutive vals belonging to the same row */ 321 for (j=i,rstart=row[j]; j<n; j++) { 322 if (row[j] != rstart) break; 323 } 324 if (j < n) ncols = j-i; 325 else ncols = n-i; 326 /* Now assemble all these values with a single function call */ 327 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 328 i = j; 329 } 330 } 331 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 332 333 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 334 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 335 336 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 337 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 343 { 344 PetscErrorCode ierr; 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 347 PetscFunctionBegin; 348 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 /* the code does not do the diagonal entries correctly unless the 353 matrix is square and the column and row owerships are identical. 354 This is a BUG. The only way to fix it seems to be to access 355 mdn->A and mdn->B directly and not through the MatZeroRows() 356 routine. 357 */ 358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 359 { 360 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 361 PetscErrorCode ierr; 362 PetscInt i,*owners = A->rmap->range; 363 PetscInt *sizes,j,idx,nsends; 364 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 365 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 366 PetscInt *lens,*lrows,*values; 367 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 368 MPI_Comm comm; 369 MPI_Request *send_waits,*recv_waits; 370 MPI_Status recv_status,*send_status; 371 PetscBool found; 372 const PetscScalar *xx; 373 PetscScalar *bb; 374 375 PetscFunctionBegin; 376 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 377 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 378 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 379 /* first count number of contributors to each processor */ 380 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 381 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 382 for (i=0; i<N; i++) { 383 idx = rows[i]; 384 found = PETSC_FALSE; 385 for (j=0; j<size; j++) { 386 if (idx >= owners[j] && idx < owners[j+1]) { 387 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 388 } 389 } 390 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 391 } 392 nsends = 0; 393 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 394 395 /* inform other processors of number of messages and max length*/ 396 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 397 398 /* post receives: */ 399 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 400 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 401 for (i=0; i<nrecvs; i++) { 402 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 403 } 404 405 /* do sends: 406 1) starts[i] gives the starting index in svalues for stuff going to 407 the ith processor 408 */ 409 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 410 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 411 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 412 413 starts[0] = 0; 414 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 415 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 416 417 starts[0] = 0; 418 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 419 count = 0; 420 for (i=0; i<size; i++) { 421 if (sizes[2*i+1]) { 422 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 423 } 424 } 425 ierr = PetscFree(starts);CHKERRQ(ierr); 426 427 base = owners[rank]; 428 429 /* wait on receives */ 430 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 431 count = nrecvs; 432 slen = 0; 433 while (count) { 434 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 435 /* unpack receives into our local space */ 436 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 437 438 source[imdex] = recv_status.MPI_SOURCE; 439 lens[imdex] = n; 440 slen += n; 441 count--; 442 } 443 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 444 445 /* move the data into the send scatter */ 446 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 447 count = 0; 448 for (i=0; i<nrecvs; i++) { 449 values = rvalues + i*nmax; 450 for (j=0; j<lens[i]; j++) { 451 lrows[count++] = values[j] - base; 452 } 453 } 454 ierr = PetscFree(rvalues);CHKERRQ(ierr); 455 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 456 ierr = PetscFree(owner);CHKERRQ(ierr); 457 ierr = PetscFree(sizes);CHKERRQ(ierr); 458 459 /* fix right hand side if needed */ 460 if (x && b) { 461 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 462 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 463 for (i=0; i<slen; i++) { 464 bb[lrows[i]] = diag*xx[lrows[i]]; 465 } 466 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 467 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 468 } 469 470 /* actually zap the local rows */ 471 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 472 if (diag != 0.0) { 473 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 474 PetscInt m = ll->lda, i; 475 476 for (i=0; i<slen; i++) { 477 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 478 } 479 } 480 ierr = PetscFree(lrows);CHKERRQ(ierr); 481 482 /* wait on sends */ 483 if (nsends) { 484 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 485 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 486 ierr = PetscFree(send_status);CHKERRQ(ierr); 487 } 488 ierr = PetscFree(send_waits);CHKERRQ(ierr); 489 ierr = PetscFree(svalues);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 497 498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 501 PetscErrorCode ierr; 502 503 PetscFunctionBegin; 504 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 523 { 524 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 525 PetscErrorCode ierr; 526 PetscScalar zero = 0.0; 527 528 PetscFunctionBegin; 529 ierr = VecSet(yy,zero);CHKERRQ(ierr); 530 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 531 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 537 { 538 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 543 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 544 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 545 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 552 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 553 PetscErrorCode ierr; 554 PetscInt len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 557 PetscFunctionBegin; 558 ierr = VecSet(v,zero);CHKERRQ(ierr); 559 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 560 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 561 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 563 radd = A->rmap->rstart*m; 564 for (i=0; i<len; i++) { 565 x[i] = aloc->v[radd + i*m + i]; 566 } 567 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 PetscErrorCode MatDestroy_MPIDense(Mat mat) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 579 #endif 580 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 581 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 582 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 583 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 584 585 ierr = PetscFree(mat->data);CHKERRQ(ierr); 586 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 587 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 595 #if defined(PETSC_HAVE_ELEMENTAL) 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 597 #endif 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } 609 610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 611 { 612 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 613 PetscErrorCode ierr; 614 PetscViewerFormat format; 615 int fd; 616 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 617 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 618 PetscScalar *work,*v,*vv; 619 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 620 621 PetscFunctionBegin; 622 if (mdn->size == 1) { 623 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 624 } else { 625 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 626 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 628 629 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 630 if (format == PETSC_VIEWER_NATIVE) { 631 632 if (!rank) { 633 /* store the matrix as a dense matrix */ 634 header[0] = MAT_FILE_CLASSID; 635 header[1] = mat->rmap->N; 636 header[2] = N; 637 header[3] = MATRIX_BINARY_FORMAT_DENSE; 638 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 639 640 /* get largest work array needed for transposing array */ 641 mmax = mat->rmap->n; 642 for (i=1; i<size; i++) { 643 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 644 } 645 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 646 647 /* write out local array, by rows */ 648 m = mat->rmap->n; 649 v = a->v; 650 for (j=0; j<N; j++) { 651 for (i=0; i<m; i++) { 652 work[j + i*N] = *v++; 653 } 654 } 655 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 656 /* get largest work array to receive messages from other processes, excludes process zero */ 657 mmax = 0; 658 for (i=1; i<size; i++) { 659 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 660 } 661 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 662 for (k = 1; k < size; k++) { 663 v = vv; 664 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 665 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 666 667 for (j = 0; j < N; j++) { 668 for (i = 0; i < m; i++) { 669 work[j + i*N] = *v++; 670 } 671 } 672 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 673 } 674 ierr = PetscFree(work);CHKERRQ(ierr); 675 ierr = PetscFree(vv);CHKERRQ(ierr); 676 } else { 677 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 } 679 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 680 } 681 PetscFunctionReturn(0); 682 } 683 684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 685 #include <petscdraw.h> 686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 687 { 688 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 689 PetscErrorCode ierr; 690 PetscMPIInt rank = mdn->rank; 691 PetscViewerType vtype; 692 PetscBool iascii,isdraw; 693 PetscViewer sviewer; 694 PetscViewerFormat format; 695 696 PetscFunctionBegin; 697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 699 if (iascii) { 700 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 701 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 702 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 703 MatInfo info; 704 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 707 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 708 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 709 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } else if (format == PETSC_VIEWER_ASCII_INFO) { 712 PetscFunctionReturn(0); 713 } 714 } else if (isdraw) { 715 PetscDraw draw; 716 PetscBool isnull; 717 718 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 719 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 720 if (isnull) PetscFunctionReturn(0); 721 } 722 723 { 724 /* assemble the entire matrix onto first processor. */ 725 Mat A; 726 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 727 PetscInt *cols; 728 PetscScalar *vals; 729 730 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 737 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 738 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 740 741 /* Copy the matrix ... This isn't the most efficient means, 742 but it's quick for now */ 743 A->insertmode = INSERT_VALUES; 744 745 row = mat->rmap->rstart; 746 m = mdn->A->rmap->n; 747 for (i=0; i<m; i++) { 748 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 749 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 750 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 751 row++; 752 } 753 754 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 757 if (!rank) { 758 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 759 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 760 } 761 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 762 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 763 ierr = MatDestroy(&A);CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 769 { 770 PetscErrorCode ierr; 771 PetscBool iascii,isbinary,isdraw,issocket; 772 773 PetscFunctionBegin; 774 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 775 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 776 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 777 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 778 779 if (iascii || issocket || isdraw) { 780 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 781 } else if (isbinary) { 782 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 788 { 789 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 790 Mat mdn = mat->A; 791 PetscErrorCode ierr; 792 PetscReal isend[5],irecv[5]; 793 794 PetscFunctionBegin; 795 info->block_size = 1.0; 796 797 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 798 799 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 800 isend[3] = info->memory; isend[4] = info->mallocs; 801 if (flag == MAT_LOCAL) { 802 info->nz_used = isend[0]; 803 info->nz_allocated = isend[1]; 804 info->nz_unneeded = isend[2]; 805 info->memory = isend[3]; 806 info->mallocs = isend[4]; 807 } else if (flag == MAT_GLOBAL_MAX) { 808 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 809 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } else if (flag == MAT_GLOBAL_SUM) { 816 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 817 818 info->nz_used = irecv[0]; 819 info->nz_allocated = irecv[1]; 820 info->nz_unneeded = irecv[2]; 821 info->memory = irecv[3]; 822 info->mallocs = irecv[4]; 823 } 824 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 825 info->fill_ratio_needed = 0; 826 info->factor_mallocs = 0; 827 PetscFunctionReturn(0); 828 } 829 830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_NEW_NONZERO_LOCATIONS: 838 case MAT_NEW_NONZERO_LOCATION_ERR: 839 case MAT_NEW_NONZERO_ALLOCATION_ERR: 840 MatCheckPreallocated(A,1); 841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 842 break; 843 case MAT_ROW_ORIENTED: 844 MatCheckPreallocated(A,1); 845 a->roworiented = flg; 846 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 847 break; 848 case MAT_NEW_DIAGONALS: 849 case MAT_KEEP_NONZERO_PATTERN: 850 case MAT_USE_HASH_TABLE: 851 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 852 break; 853 case MAT_IGNORE_OFF_PROC_ENTRIES: 854 a->donotstash = flg; 855 break; 856 case MAT_SYMMETRIC: 857 case MAT_STRUCTURALLY_SYMMETRIC: 858 case MAT_HERMITIAN: 859 case MAT_SYMMETRY_ETERNAL: 860 case MAT_IGNORE_LOWER_TRIANGULAR: 861 case MAT_IGNORE_ZERO_ENTRIES: 862 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 863 break; 864 default: 865 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 866 } 867 PetscFunctionReturn(0); 868 } 869 870 871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 872 { 873 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 874 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 875 const PetscScalar *l,*r; 876 PetscScalar x,*v; 877 PetscErrorCode ierr; 878 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 879 880 PetscFunctionBegin; 881 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 882 if (ll) { 883 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 884 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 885 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 886 for (i=0; i<m; i++) { 887 x = l[i]; 888 v = mat->v + i; 889 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 890 } 891 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 892 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 893 } 894 if (rr) { 895 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 896 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 897 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 899 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 900 for (i=0; i<n; i++) { 901 x = r[i]; 902 v = mat->v + i*m; 903 for (j=0; j<m; j++) (*v++) *= x; 904 } 905 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 906 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 912 { 913 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 914 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 915 PetscErrorCode ierr; 916 PetscInt i,j; 917 PetscReal sum = 0.0; 918 PetscScalar *v = mat->v; 919 920 PetscFunctionBegin; 921 if (mdn->size == 1) { 922 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 923 } else { 924 if (type == NORM_FROBENIUS) { 925 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 926 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 927 } 928 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 *nrm = PetscSqrtReal(*nrm); 930 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 931 } else if (type == NORM_1) { 932 PetscReal *tmp,*tmp2; 933 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 934 *nrm = 0.0; 935 v = mat->v; 936 for (j=0; j<mdn->A->cmap->n; j++) { 937 for (i=0; i<mdn->A->rmap->n; i++) { 938 tmp[j] += PetscAbsScalar(*v); v++; 939 } 940 } 941 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 942 for (j=0; j<A->cmap->N; j++) { 943 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 944 } 945 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 946 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 947 } else if (type == NORM_INFINITY) { /* max row norm */ 948 PetscReal ntemp; 949 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 950 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 951 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 952 } 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 959 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 960 Mat B; 961 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 962 PetscErrorCode ierr; 963 PetscInt j,i; 964 PetscScalar *v; 965 966 PetscFunctionBegin; 967 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 968 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 969 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 970 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 971 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 972 } else { 973 B = *matout; 974 } 975 976 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 977 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 978 for (i=0; i<m; i++) rwork[i] = rstart + i; 979 for (j=0; j<n; j++) { 980 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 981 v += m; 982 } 983 ierr = PetscFree(rwork);CHKERRQ(ierr); 984 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 985 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 987 *matout = B; 988 } else { 989 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 995 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 996 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 997 998 PetscErrorCode MatSetUp_MPIDense(Mat A) 999 { 1000 PetscErrorCode ierr; 1001 1002 PetscFunctionBegin; 1003 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1008 { 1009 PetscErrorCode ierr; 1010 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1011 1012 PetscFunctionBegin; 1013 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1014 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1019 { 1020 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 PetscErrorCode MatRealPart_MPIDense(Mat A) 1029 { 1030 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1039 { 1040 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1049 { 1050 PetscErrorCode ierr; 1051 PetscScalar *x; 1052 const PetscScalar *a; 1053 PetscInt lda; 1054 1055 PetscFunctionBegin; 1056 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1057 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1058 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1059 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1060 ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1062 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1067 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1068 { 1069 PetscErrorCode ierr; 1070 PetscInt i,n; 1071 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1072 PetscReal *work; 1073 1074 PetscFunctionBegin; 1075 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1077 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1078 if (type == NORM_2) { 1079 for (i=0; i<n; i++) work[i] *= work[i]; 1080 } 1081 if (type == NORM_INFINITY) { 1082 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1083 } else { 1084 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1085 } 1086 ierr = PetscFree(work);CHKERRQ(ierr); 1087 if (type == NORM_2) { 1088 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1089 } 1090 PetscFunctionReturn(0); 1091 } 1092 1093 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1094 { 1095 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1096 PetscErrorCode ierr; 1097 PetscScalar *a; 1098 PetscInt m,n,i; 1099 1100 PetscFunctionBegin; 1101 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1102 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1103 for (i=0; i<m*n; i++) { 1104 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1105 } 1106 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1111 1112 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1113 { 1114 PetscFunctionBegin; 1115 *missing = PETSC_FALSE; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1120 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1121 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1122 1123 /* -------------------------------------------------------------------*/ 1124 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1125 MatGetRow_MPIDense, 1126 MatRestoreRow_MPIDense, 1127 MatMult_MPIDense, 1128 /* 4*/ MatMultAdd_MPIDense, 1129 MatMultTranspose_MPIDense, 1130 MatMultTransposeAdd_MPIDense, 1131 0, 1132 0, 1133 0, 1134 /* 10*/ 0, 1135 0, 1136 0, 1137 0, 1138 MatTranspose_MPIDense, 1139 /* 15*/ MatGetInfo_MPIDense, 1140 MatEqual_MPIDense, 1141 MatGetDiagonal_MPIDense, 1142 MatDiagonalScale_MPIDense, 1143 MatNorm_MPIDense, 1144 /* 20*/ MatAssemblyBegin_MPIDense, 1145 MatAssemblyEnd_MPIDense, 1146 MatSetOption_MPIDense, 1147 MatZeroEntries_MPIDense, 1148 /* 24*/ MatZeroRows_MPIDense, 1149 0, 1150 0, 1151 0, 1152 0, 1153 /* 29*/ MatSetUp_MPIDense, 1154 0, 1155 0, 1156 MatGetDiagonalBlock_MPIDense, 1157 0, 1158 /* 34*/ MatDuplicate_MPIDense, 1159 0, 1160 0, 1161 0, 1162 0, 1163 /* 39*/ MatAXPY_MPIDense, 1164 MatCreateSubMatrices_MPIDense, 1165 0, 1166 MatGetValues_MPIDense, 1167 0, 1168 /* 44*/ 0, 1169 MatScale_MPIDense, 1170 MatShift_Basic, 1171 0, 1172 0, 1173 /* 49*/ MatSetRandom_MPIDense, 1174 0, 1175 0, 1176 0, 1177 0, 1178 /* 54*/ 0, 1179 0, 1180 0, 1181 0, 1182 0, 1183 /* 59*/ MatCreateSubMatrix_MPIDense, 1184 MatDestroy_MPIDense, 1185 MatView_MPIDense, 1186 0, 1187 0, 1188 /* 64*/ 0, 1189 0, 1190 0, 1191 0, 1192 0, 1193 /* 69*/ 0, 1194 0, 1195 0, 1196 0, 1197 0, 1198 /* 74*/ 0, 1199 0, 1200 0, 1201 0, 1202 0, 1203 /* 79*/ 0, 1204 0, 1205 0, 1206 0, 1207 /* 83*/ MatLoad_MPIDense, 1208 0, 1209 0, 1210 0, 1211 0, 1212 0, 1213 #if defined(PETSC_HAVE_ELEMENTAL) 1214 /* 89*/ MatMatMult_MPIDense_MPIDense, 1215 MatMatMultSymbolic_MPIDense_MPIDense, 1216 #else 1217 /* 89*/ 0, 1218 0, 1219 #endif 1220 MatMatMultNumeric_MPIDense, 1221 0, 1222 0, 1223 /* 94*/ 0, 1224 MatMatTransposeMult_MPIDense_MPIDense, 1225 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1226 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1227 0, 1228 /* 99*/ 0, 1229 0, 1230 0, 1231 MatConjugate_MPIDense, 1232 0, 1233 /*104*/ 0, 1234 MatRealPart_MPIDense, 1235 MatImaginaryPart_MPIDense, 1236 0, 1237 0, 1238 /*109*/ 0, 1239 0, 1240 0, 1241 MatGetColumnVector_MPIDense, 1242 MatMissingDiagonal_MPIDense, 1243 /*114*/ 0, 1244 0, 1245 0, 1246 0, 1247 0, 1248 /*119*/ 0, 1249 0, 1250 0, 1251 0, 1252 0, 1253 /*124*/ 0, 1254 MatGetColumnNorms_MPIDense, 1255 0, 1256 0, 1257 0, 1258 /*129*/ 0, 1259 MatTransposeMatMult_MPIDense_MPIDense, 1260 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1261 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1262 0, 1263 /*134*/ 0, 1264 0, 1265 0, 1266 0, 1267 0, 1268 /*139*/ 0, 1269 0, 1270 0, 1271 0, 1272 0, 1273 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1274 }; 1275 1276 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1277 { 1278 Mat_MPIDense *a; 1279 PetscErrorCode ierr; 1280 1281 PetscFunctionBegin; 1282 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1283 mat->preallocated = PETSC_TRUE; 1284 /* Note: For now, when data is specified above, this assumes the user correctly 1285 allocates the local dense storage space. We should add error checking. */ 1286 1287 a = (Mat_MPIDense*)mat->data; 1288 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1289 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1290 a->nvec = mat->cmap->n; 1291 1292 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1293 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1294 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1295 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1296 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #if defined(PETSC_HAVE_ELEMENTAL) 1301 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1302 { 1303 Mat mat_elemental; 1304 PetscErrorCode ierr; 1305 PetscScalar *v; 1306 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1307 1308 PetscFunctionBegin; 1309 if (reuse == MAT_REUSE_MATRIX) { 1310 mat_elemental = *newmat; 1311 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1312 } else { 1313 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1314 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1315 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1316 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1317 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1318 } 1319 1320 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1321 for (i=0; i<N; i++) cols[i] = i; 1322 for (i=0; i<m; i++) rows[i] = rstart + i; 1323 1324 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1325 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1326 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1327 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1328 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1329 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1330 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1331 1332 if (reuse == MAT_INPLACE_MATRIX) { 1333 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1334 } else { 1335 *newmat = mat_elemental; 1336 } 1337 PetscFunctionReturn(0); 1338 } 1339 #endif 1340 1341 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1342 { 1343 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1344 PetscErrorCode ierr; 1345 1346 PetscFunctionBegin; 1347 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1348 PetscFunctionReturn(0); 1349 } 1350 1351 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1352 { 1353 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1362 { 1363 PetscErrorCode ierr; 1364 Mat_MPIDense *mat; 1365 PetscInt m,nloc,N; 1366 1367 PetscFunctionBegin; 1368 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1369 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1370 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1371 PetscInt sum; 1372 1373 if (n == PETSC_DECIDE) { 1374 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1375 } 1376 /* Check sum(n) = N */ 1377 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1378 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1379 1380 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1381 } 1382 1383 /* numeric phase */ 1384 mat = (Mat_MPIDense*)(*outmat)->data; 1385 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1386 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1387 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1392 { 1393 Mat_MPIDense *a; 1394 PetscErrorCode ierr; 1395 1396 PetscFunctionBegin; 1397 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1398 mat->data = (void*)a; 1399 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1400 1401 mat->insertmode = NOT_SET_VALUES; 1402 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1403 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1404 1405 /* build cache for off array entries formed */ 1406 a->donotstash = PETSC_FALSE; 1407 1408 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1409 1410 /* stuff used for matrix vector multiply */ 1411 a->lvec = 0; 1412 a->Mvctx = 0; 1413 a->roworiented = PETSC_TRUE; 1414 1415 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1416 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1417 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1418 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1422 #if defined(PETSC_HAVE_ELEMENTAL) 1423 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1424 #endif 1425 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1426 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1427 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1428 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1429 1430 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1431 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1432 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1433 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1435 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 /*MC 1440 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1441 1442 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1443 and MATMPIDENSE otherwise. 1444 1445 Options Database Keys: 1446 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1447 1448 Level: beginner 1449 1450 1451 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1452 M*/ 1453 1454 /*@C 1455 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1456 1457 Not collective 1458 1459 Input Parameters: 1460 . B - the matrix 1461 - data - optional location of matrix data. Set data=NULL for PETSc 1462 to control all matrix memory allocation. 1463 1464 Notes: 1465 The dense format is fully compatible with standard Fortran 77 1466 storage by columns. 1467 1468 The data input variable is intended primarily for Fortran programmers 1469 who wish to allocate their own matrix memory space. Most users should 1470 set data=NULL. 1471 1472 Level: intermediate 1473 1474 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1475 @*/ 1476 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1477 { 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 /*@ 1486 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1487 array provided by the user. This is useful to avoid copying an array 1488 into a matrix 1489 1490 Not Collective 1491 1492 Input Parameters: 1493 + mat - the matrix 1494 - array - the array in column major order 1495 1496 Notes: 1497 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1498 freed when the matrix is destroyed. 1499 1500 Level: developer 1501 1502 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1503 1504 @*/ 1505 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1506 { 1507 PetscErrorCode ierr; 1508 PetscFunctionBegin; 1509 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1510 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 /*@ 1515 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1516 1517 Not Collective 1518 1519 Input Parameters: 1520 . mat - the matrix 1521 1522 Notes: 1523 You can only call this after a call to MatDensePlaceArray() 1524 1525 Level: developer 1526 1527 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1528 1529 @*/ 1530 PetscErrorCode MatDenseResetArray(Mat mat) 1531 { 1532 PetscErrorCode ierr; 1533 PetscFunctionBegin; 1534 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1535 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 /*@C 1540 MatCreateDense - Creates a parallel matrix in dense format. 1541 1542 Collective 1543 1544 Input Parameters: 1545 + comm - MPI communicator 1546 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1547 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1548 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1549 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1550 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1551 to control all matrix memory allocation. 1552 1553 Output Parameter: 1554 . A - the matrix 1555 1556 Notes: 1557 The dense format is fully compatible with standard Fortran 77 1558 storage by columns. 1559 1560 The data input variable is intended primarily for Fortran programmers 1561 who wish to allocate their own matrix memory space. Most users should 1562 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1563 1564 The user MUST specify either the local or global matrix dimensions 1565 (possibly both). 1566 1567 Level: intermediate 1568 1569 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1570 @*/ 1571 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1572 { 1573 PetscErrorCode ierr; 1574 PetscMPIInt size; 1575 1576 PetscFunctionBegin; 1577 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1578 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1579 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1580 if (size > 1) { 1581 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1582 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1583 if (data) { /* user provided data array, so no need to assemble */ 1584 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1585 (*A)->assembled = PETSC_TRUE; 1586 } 1587 } else { 1588 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1589 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1590 } 1591 PetscFunctionReturn(0); 1592 } 1593 1594 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1595 { 1596 Mat mat; 1597 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 *newmat = 0; 1602 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1603 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1604 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1605 a = (Mat_MPIDense*)mat->data; 1606 1607 mat->factortype = A->factortype; 1608 mat->assembled = PETSC_TRUE; 1609 mat->preallocated = PETSC_TRUE; 1610 1611 a->size = oldmat->size; 1612 a->rank = oldmat->rank; 1613 mat->insertmode = NOT_SET_VALUES; 1614 a->nvec = oldmat->nvec; 1615 a->donotstash = oldmat->donotstash; 1616 1617 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1618 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1619 1620 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1621 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1622 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1623 1624 *newmat = mat; 1625 PetscFunctionReturn(0); 1626 } 1627 1628 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1629 { 1630 PetscErrorCode ierr; 1631 PetscMPIInt rank,size; 1632 const PetscInt *rowners; 1633 PetscInt i,m,n,nz,j,mMax; 1634 PetscScalar *array,*vals,*vals_ptr; 1635 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1636 1637 PetscFunctionBegin; 1638 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1639 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1640 1641 /* determine ownership of rows and columns */ 1642 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1643 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1644 1645 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1646 if (!a->A) { 1647 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1648 } 1649 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1650 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1651 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1652 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1653 if (!rank) { 1654 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1655 1656 /* read in my part of the matrix numerical values */ 1657 ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1658 1659 /* insert into matrix-by row (this is why cannot directly read into array */ 1660 vals_ptr = vals; 1661 for (i=0; i<m; i++) { 1662 for (j=0; j<N; j++) { 1663 array[i + j*m] = *vals_ptr++; 1664 } 1665 } 1666 1667 /* read in other processors and ship out */ 1668 for (i=1; i<size; i++) { 1669 nz = (rowners[i+1] - rowners[i])*N; 1670 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1671 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1672 } 1673 } else { 1674 /* receive numeric values */ 1675 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1676 1677 /* receive message of values*/ 1678 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1679 1680 /* insert into matrix-by row (this is why cannot directly read into array */ 1681 vals_ptr = vals; 1682 for (i=0; i<m; i++) { 1683 for (j=0; j<N; j++) { 1684 array[i + j*m] = *vals_ptr++; 1685 } 1686 } 1687 } 1688 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1689 ierr = PetscFree(vals);CHKERRQ(ierr); 1690 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1691 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 1695 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer) 1696 { 1697 Mat_MPIDense *a; 1698 PetscScalar *vals,*svals; 1699 MPI_Comm comm; 1700 MPI_Status status; 1701 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1702 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1703 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1704 PetscInt i,nz,j,rstart,rend; 1705 int fd; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1710 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1711 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1712 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1713 if (!rank) { 1714 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 1715 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1716 } 1717 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1718 M = header[1]; N = header[2]; nz = header[3]; 1719 1720 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1721 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1722 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1723 1724 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1725 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1726 1727 /* 1728 Handle case where matrix is stored on disk as a dense matrix 1729 */ 1730 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1731 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 /* determine ownership of all rows */ 1736 if (newmat->rmap->n < 0) { 1737 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1738 } else { 1739 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1740 } 1741 if (newmat->cmap->n < 0) { 1742 n = PETSC_DECIDE; 1743 } else { 1744 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1745 } 1746 1747 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1748 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1749 rowners[0] = 0; 1750 for (i=2; i<=size; i++) { 1751 rowners[i] += rowners[i-1]; 1752 } 1753 rstart = rowners[rank]; 1754 rend = rowners[rank+1]; 1755 1756 /* distribute row lengths to all processors */ 1757 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1758 if (!rank) { 1759 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1760 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 1761 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1762 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1763 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1764 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1765 } else { 1766 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1767 } 1768 1769 if (!rank) { 1770 /* calculate the number of nonzeros on each processor */ 1771 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 1772 for (i=0; i<size; i++) { 1773 for (j=rowners[i]; j< rowners[i+1]; j++) { 1774 procsnz[i] += rowlengths[j]; 1775 } 1776 } 1777 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1778 1779 /* determine max buffer needed and allocate it */ 1780 maxnz = 0; 1781 for (i=0; i<size; i++) { 1782 maxnz = PetscMax(maxnz,procsnz[i]); 1783 } 1784 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1785 1786 /* read in my part of the matrix column indices */ 1787 nz = procsnz[0]; 1788 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1789 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1790 1791 /* read in every one elses and ship off */ 1792 for (i=1; i<size; i++) { 1793 nz = procsnz[i]; 1794 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1795 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1796 } 1797 ierr = PetscFree(cols);CHKERRQ(ierr); 1798 } else { 1799 /* determine buffer space needed for message */ 1800 nz = 0; 1801 for (i=0; i<m; i++) { 1802 nz += ourlens[i]; 1803 } 1804 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1805 1806 /* receive message of column indices*/ 1807 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1808 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1809 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1810 } 1811 1812 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1813 a = (Mat_MPIDense*)newmat->data; 1814 if (!a->A) { 1815 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1816 } 1817 1818 if (!rank) { 1819 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1820 1821 /* read in my part of the matrix numerical values */ 1822 nz = procsnz[0]; 1823 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1824 1825 /* insert into matrix */ 1826 jj = rstart; 1827 smycols = mycols; 1828 svals = vals; 1829 for (i=0; i<m; i++) { 1830 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1831 smycols += ourlens[i]; 1832 svals += ourlens[i]; 1833 jj++; 1834 } 1835 1836 /* read in other processors and ship out */ 1837 for (i=1; i<size; i++) { 1838 nz = procsnz[i]; 1839 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1840 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1841 } 1842 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1843 } else { 1844 /* receive numeric values */ 1845 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1846 1847 /* receive message of values*/ 1848 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1849 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1850 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1851 1852 /* insert into matrix */ 1853 jj = rstart; 1854 smycols = mycols; 1855 svals = vals; 1856 for (i=0; i<m; i++) { 1857 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1858 smycols += ourlens[i]; 1859 svals += ourlens[i]; 1860 jj++; 1861 } 1862 } 1863 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1864 ierr = PetscFree(vals);CHKERRQ(ierr); 1865 ierr = PetscFree(mycols);CHKERRQ(ierr); 1866 ierr = PetscFree(rowners);CHKERRQ(ierr); 1867 1868 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1869 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1874 { 1875 PetscErrorCode ierr; 1876 PetscBool isbinary; 1877 #if defined(PETSC_HAVE_HDF5) 1878 PetscBool ishdf5; 1879 #endif 1880 1881 PetscFunctionBegin; 1882 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1883 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1884 /* force binary viewer to load .info file if it has not yet done so */ 1885 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1886 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1887 #if defined(PETSC_HAVE_HDF5) 1888 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1889 #endif 1890 if (isbinary) { 1891 ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr); 1892 #if defined(PETSC_HAVE_HDF5) 1893 } else if (ishdf5) { 1894 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1895 #endif 1896 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1901 { 1902 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1903 Mat a,b; 1904 PetscBool flg; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 a = matA->A; 1909 b = matB->A; 1910 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1911 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1916 { 1917 PetscErrorCode ierr; 1918 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1919 Mat_TransMatMultDense *atb = a->atbdense; 1920 1921 PetscFunctionBegin; 1922 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1923 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1924 ierr = PetscFree(atb);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1929 { 1930 PetscErrorCode ierr; 1931 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1932 Mat_MatTransMultDense *abt = a->abtdense; 1933 1934 PetscFunctionBegin; 1935 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1936 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1937 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1938 ierr = PetscFree(abt);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1943 { 1944 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1945 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1946 Mat_TransMatMultDense *atb = c->atbdense; 1947 PetscErrorCode ierr; 1948 MPI_Comm comm; 1949 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1950 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1951 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1952 PetscScalar _DOne=1.0,_DZero=0.0; 1953 PetscBLASInt am,an,bn,aN; 1954 const PetscInt *ranges; 1955 1956 PetscFunctionBegin; 1957 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1958 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1959 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1960 1961 /* compute atbarray = aseq^T * bseq */ 1962 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1963 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1964 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1965 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1966 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1967 1968 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1969 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1970 1971 /* arrange atbarray into sendbuf */ 1972 k = 0; 1973 for (proc=0; proc<size; proc++) { 1974 for (j=0; j<cN; j++) { 1975 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1976 } 1977 } 1978 /* sum all atbarray to local values of C */ 1979 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1980 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1981 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1982 PetscFunctionReturn(0); 1983 } 1984 1985 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1986 { 1987 PetscErrorCode ierr; 1988 Mat Cdense; 1989 MPI_Comm comm; 1990 PetscMPIInt size; 1991 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1992 Mat_MPIDense *c; 1993 Mat_TransMatMultDense *atb; 1994 1995 PetscFunctionBegin; 1996 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1997 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1998 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1999 } 2000 2001 /* create matrix product Cdense */ 2002 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2003 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2004 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2005 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2006 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2007 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2008 *C = Cdense; 2009 2010 /* create data structure for reuse Cdense */ 2011 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2012 ierr = PetscNew(&atb);CHKERRQ(ierr); 2013 cM = Cdense->rmap->N; 2014 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 2015 2016 c = (Mat_MPIDense*)Cdense->data; 2017 c->atbdense = atb; 2018 atb->destroy = Cdense->ops->destroy; 2019 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2020 PetscFunctionReturn(0); 2021 } 2022 2023 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2024 { 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 if (scall == MAT_INITIAL_MATRIX) { 2029 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2030 } 2031 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 2036 { 2037 PetscErrorCode ierr; 2038 Mat Cdense; 2039 MPI_Comm comm; 2040 PetscMPIInt i, size; 2041 PetscInt maxRows, bufsiz; 2042 Mat_MPIDense *c; 2043 PetscMPIInt tag; 2044 const char *algTypes[2] = {"allgatherv","cyclic"}; 2045 PetscInt alg, nalg = 2; 2046 Mat_MatTransMultDense *abt; 2047 2048 PetscFunctionBegin; 2049 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2050 alg = 0; /* default is allgatherv */ 2051 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2052 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 2053 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2054 if (A->cmap->N != B->cmap->N) { 2055 SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 2056 } 2057 2058 /* create matrix product Cdense */ 2059 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2060 ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2061 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2062 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2063 ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 2064 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 *C = Cdense; 2067 2068 /* create data structure for reuse Cdense */ 2069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2070 ierr = PetscNew(&abt);CHKERRQ(ierr); 2071 abt->tag = tag; 2072 abt->alg = alg; 2073 switch (alg) { 2074 case 1: 2075 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2076 bufsiz = A->cmap->N * maxRows; 2077 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2078 break; 2079 default: 2080 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2081 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2082 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2083 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2084 break; 2085 } 2086 2087 c = (Mat_MPIDense*)Cdense->data; 2088 c->abtdense = abt; 2089 abt->destroy = Cdense->ops->destroy; 2090 Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2091 PetscFunctionReturn(0); 2092 } 2093 2094 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2095 { 2096 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2097 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2098 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2099 Mat_MatTransMultDense *abt = c->abtdense; 2100 PetscErrorCode ierr; 2101 MPI_Comm comm; 2102 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2103 PetscScalar *sendbuf, *recvbuf=0, *carray; 2104 PetscInt i,cK=A->cmap->N,k,j,bn; 2105 PetscScalar _DOne=1.0,_DZero=0.0; 2106 PetscBLASInt cm, cn, ck; 2107 MPI_Request reqs[2]; 2108 const PetscInt *ranges; 2109 2110 PetscFunctionBegin; 2111 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2112 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2113 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2114 2115 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2116 bn = B->rmap->n; 2117 if (bseq->lda == bn) { 2118 sendbuf = bseq->v; 2119 } else { 2120 sendbuf = abt->buf[0]; 2121 for (k = 0, i = 0; i < cK; i++) { 2122 for (j = 0; j < bn; j++, k++) { 2123 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2124 } 2125 } 2126 } 2127 if (size > 1) { 2128 sendto = (rank + size - 1) % size; 2129 recvfrom = (rank + size + 1) % size; 2130 } else { 2131 sendto = recvfrom = 0; 2132 } 2133 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2134 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2135 recvisfrom = rank; 2136 for (i = 0; i < size; i++) { 2137 /* we have finished receiving in sending, bufs can be read/modified */ 2138 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2139 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2140 2141 if (nextrecvisfrom != rank) { 2142 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2143 sendsiz = cK * bn; 2144 recvsiz = cK * nextbn; 2145 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2146 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2147 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2148 } 2149 2150 /* local aseq * sendbuf^T */ 2151 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2152 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 2153 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 2154 2155 if (nextrecvisfrom != rank) { 2156 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2157 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2158 } 2159 bn = nextbn; 2160 recvisfrom = nextrecvisfrom; 2161 sendbuf = recvbuf; 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2167 { 2168 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2169 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2170 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2171 Mat_MatTransMultDense *abt = c->abtdense; 2172 PetscErrorCode ierr; 2173 MPI_Comm comm; 2174 PetscMPIInt rank,size; 2175 PetscScalar *sendbuf, *recvbuf; 2176 PetscInt i,cK=A->cmap->N,k,j,bn; 2177 PetscScalar _DOne=1.0,_DZero=0.0; 2178 PetscBLASInt cm, cn, ck; 2179 2180 PetscFunctionBegin; 2181 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2182 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2184 2185 /* copy transpose of B into buf[0] */ 2186 bn = B->rmap->n; 2187 sendbuf = abt->buf[0]; 2188 recvbuf = abt->buf[1]; 2189 for (k = 0, j = 0; j < bn; j++) { 2190 for (i = 0; i < cK; i++, k++) { 2191 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2192 } 2193 } 2194 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2195 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2196 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2197 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2198 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2203 { 2204 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2205 Mat_MatTransMultDense *abt = c->abtdense; 2206 PetscErrorCode ierr; 2207 2208 PetscFunctionBegin; 2209 switch (abt->alg) { 2210 case 1: 2211 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2212 break; 2213 default: 2214 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2215 break; 2216 } 2217 PetscFunctionReturn(0); 2218 } 2219 2220 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 2221 { 2222 PetscErrorCode ierr; 2223 2224 PetscFunctionBegin; 2225 if (scall == MAT_INITIAL_MATRIX) { 2226 ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2227 } 2228 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2233 { 2234 PetscErrorCode ierr; 2235 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2236 Mat_MatMultDense *ab = a->abdense; 2237 2238 PetscFunctionBegin; 2239 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2240 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2241 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2242 2243 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2244 ierr = PetscFree(ab);CHKERRQ(ierr); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #if defined(PETSC_HAVE_ELEMENTAL) 2249 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2250 { 2251 PetscErrorCode ierr; 2252 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2253 Mat_MatMultDense *ab=c->abdense; 2254 2255 PetscFunctionBegin; 2256 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2257 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2258 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2259 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 2264 { 2265 PetscErrorCode ierr; 2266 Mat Ae,Be,Ce; 2267 Mat_MPIDense *c; 2268 Mat_MatMultDense *ab; 2269 2270 PetscFunctionBegin; 2271 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2272 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2273 } 2274 2275 /* convert A and B to Elemental matrices Ae and Be */ 2276 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2277 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2278 2279 /* Ce = Ae*Be */ 2280 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2281 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2282 2283 /* convert Ce to C */ 2284 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2285 2286 /* create data structure for reuse Cdense */ 2287 ierr = PetscNew(&ab);CHKERRQ(ierr); 2288 c = (Mat_MPIDense*)(*C)->data; 2289 c->abdense = ab; 2290 2291 ab->Ae = Ae; 2292 ab->Be = Be; 2293 ab->Ce = Ce; 2294 ab->destroy = (*C)->ops->destroy; 2295 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2296 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2297 PetscFunctionReturn(0); 2298 } 2299 2300 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2301 { 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2306 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2307 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2308 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2309 } else { 2310 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2311 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2312 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2313 } 2314 PetscFunctionReturn(0); 2315 } 2316 #endif 2317 2318