1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 198 { 199 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 200 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 201 PetscErrorCode ierr; 202 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 203 const PetscInt *irow,*icol; 204 PetscScalar *av,*bv,*v = lmat->v; 205 Mat newmat; 206 IS iscol_local; 207 MPI_Comm comm_is,comm_mat; 208 209 PetscFunctionBegin; 210 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 211 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 212 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 213 214 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 215 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 216 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 218 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 219 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 220 221 /* No parallel redistribution currently supported! Should really check each index set 222 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 223 original matrix! */ 224 225 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 226 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 227 228 /* Check submatrix call */ 229 if (scall == MAT_REUSE_MATRIX) { 230 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 231 /* Really need to test rows and column sizes! */ 232 newmat = *B; 233 } else { 234 /* Create and fill new matrix */ 235 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 236 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 237 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 238 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 239 } 240 241 /* Now extract the data pointers and do the copy, column at a time */ 242 newmatd = (Mat_MPIDense*)newmat->data; 243 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 244 245 for (i=0; i<Ncols; i++) { 246 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 247 for (j=0; j<nrows; j++) { 248 *bv++ = av[irow[j] - rstart]; 249 } 250 } 251 252 /* Assemble the matrices so that the correct flags are set */ 253 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 256 /* Free work space */ 257 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 259 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 260 *B = newmat; 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 287 MPI_Comm comm; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 294 /* make sure all processors are either in INSERTMODE or ADDMODE */ 295 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 296 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 297 mat->insertmode = addv; /* in case this processor had no cache */ 298 299 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 300 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 301 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 306 { 307 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 308 PetscErrorCode ierr; 309 PetscInt i,*row,*col,flg,j,rstart,ncols; 310 PetscMPIInt n; 311 PetscScalar *val; 312 313 PetscFunctionBegin; 314 /* wait on receives */ 315 while (1) { 316 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 317 if (!flg) break; 318 319 for (i=0; i<n;) { 320 /* Now identify the consecutive vals belonging to the same row */ 321 for (j=i,rstart=row[j]; j<n; j++) { 322 if (row[j] != rstart) break; 323 } 324 if (j < n) ncols = j-i; 325 else ncols = n-i; 326 /* Now assemble all these values with a single function call */ 327 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 328 i = j; 329 } 330 } 331 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 332 333 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 334 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 335 336 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 337 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 343 { 344 PetscErrorCode ierr; 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 347 PetscFunctionBegin; 348 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 /* the code does not do the diagonal entries correctly unless the 353 matrix is square and the column and row owerships are identical. 354 This is a BUG. The only way to fix it seems to be to access 355 mdn->A and mdn->B directly and not through the MatZeroRows() 356 routine. 357 */ 358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 359 { 360 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 361 PetscErrorCode ierr; 362 PetscInt i,*owners = A->rmap->range; 363 PetscInt *sizes,j,idx,nsends; 364 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 365 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 366 PetscInt *lens,*lrows,*values; 367 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 368 MPI_Comm comm; 369 MPI_Request *send_waits,*recv_waits; 370 MPI_Status recv_status,*send_status; 371 PetscBool found; 372 const PetscScalar *xx; 373 PetscScalar *bb; 374 375 PetscFunctionBegin; 376 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 377 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 378 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 379 /* first count number of contributors to each processor */ 380 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 381 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 382 for (i=0; i<N; i++) { 383 idx = rows[i]; 384 found = PETSC_FALSE; 385 for (j=0; j<size; j++) { 386 if (idx >= owners[j] && idx < owners[j+1]) { 387 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 388 } 389 } 390 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 391 } 392 nsends = 0; 393 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 394 395 /* inform other processors of number of messages and max length*/ 396 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 397 398 /* post receives: */ 399 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 400 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 401 for (i=0; i<nrecvs; i++) { 402 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 403 } 404 405 /* do sends: 406 1) starts[i] gives the starting index in svalues for stuff going to 407 the ith processor 408 */ 409 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 410 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 411 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 412 413 starts[0] = 0; 414 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 415 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 416 417 starts[0] = 0; 418 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 419 count = 0; 420 for (i=0; i<size; i++) { 421 if (sizes[2*i+1]) { 422 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 423 } 424 } 425 ierr = PetscFree(starts);CHKERRQ(ierr); 426 427 base = owners[rank]; 428 429 /* wait on receives */ 430 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 431 count = nrecvs; 432 slen = 0; 433 while (count) { 434 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 435 /* unpack receives into our local space */ 436 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 437 438 source[imdex] = recv_status.MPI_SOURCE; 439 lens[imdex] = n; 440 slen += n; 441 count--; 442 } 443 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 444 445 /* move the data into the send scatter */ 446 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 447 count = 0; 448 for (i=0; i<nrecvs; i++) { 449 values = rvalues + i*nmax; 450 for (j=0; j<lens[i]; j++) { 451 lrows[count++] = values[j] - base; 452 } 453 } 454 ierr = PetscFree(rvalues);CHKERRQ(ierr); 455 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 456 ierr = PetscFree(owner);CHKERRQ(ierr); 457 ierr = PetscFree(sizes);CHKERRQ(ierr); 458 459 /* fix right hand side if needed */ 460 if (x && b) { 461 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 462 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 463 for (i=0; i<slen; i++) { 464 bb[lrows[i]] = diag*xx[lrows[i]]; 465 } 466 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 467 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 468 } 469 470 /* actually zap the local rows */ 471 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 472 if (diag != 0.0) { 473 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 474 PetscInt m = ll->lda, i; 475 476 for (i=0; i<slen; i++) { 477 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 478 } 479 } 480 ierr = PetscFree(lrows);CHKERRQ(ierr); 481 482 /* wait on sends */ 483 if (nsends) { 484 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 485 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 486 ierr = PetscFree(send_status);CHKERRQ(ierr); 487 } 488 ierr = PetscFree(send_waits);CHKERRQ(ierr); 489 ierr = PetscFree(svalues);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 497 498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 501 PetscErrorCode ierr; 502 503 PetscFunctionBegin; 504 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 523 { 524 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 525 PetscErrorCode ierr; 526 PetscScalar zero = 0.0; 527 528 PetscFunctionBegin; 529 ierr = VecSet(yy,zero);CHKERRQ(ierr); 530 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 531 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 537 { 538 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 543 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 544 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 545 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 552 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 553 PetscErrorCode ierr; 554 PetscInt len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 557 PetscFunctionBegin; 558 ierr = VecSet(v,zero);CHKERRQ(ierr); 559 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 560 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 561 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 563 radd = A->rmap->rstart*m; 564 for (i=0; i<len; i++) { 565 x[i] = aloc->v[radd + i*m + i]; 566 } 567 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 PetscErrorCode MatDestroy_MPIDense(Mat mat) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 579 #endif 580 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 581 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 582 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 583 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 584 585 ierr = PetscFree(mat->data);CHKERRQ(ierr); 586 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 587 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 595 #if defined(PETSC_HAVE_ELEMENTAL) 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 597 #endif 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } 609 610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 611 { 612 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 613 PetscErrorCode ierr; 614 PetscViewerFormat format; 615 int fd; 616 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 617 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 618 PetscScalar *work,*v,*vv; 619 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 620 621 PetscFunctionBegin; 622 if (mdn->size == 1) { 623 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 624 } else { 625 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 626 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 628 629 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 630 if (format == PETSC_VIEWER_NATIVE) { 631 632 if (!rank) { 633 /* store the matrix as a dense matrix */ 634 header[0] = MAT_FILE_CLASSID; 635 header[1] = mat->rmap->N; 636 header[2] = N; 637 header[3] = MATRIX_BINARY_FORMAT_DENSE; 638 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 639 640 /* get largest work array needed for transposing array */ 641 mmax = mat->rmap->n; 642 for (i=1; i<size; i++) { 643 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 644 } 645 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 646 647 /* write out local array, by rows */ 648 m = mat->rmap->n; 649 v = a->v; 650 for (j=0; j<N; j++) { 651 for (i=0; i<m; i++) { 652 work[j + i*N] = *v++; 653 } 654 } 655 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 656 /* get largest work array to receive messages from other processes, excludes process zero */ 657 mmax = 0; 658 for (i=1; i<size; i++) { 659 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 660 } 661 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 662 for (k = 1; k < size; k++) { 663 v = vv; 664 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 665 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 666 667 for (j = 0; j < N; j++) { 668 for (i = 0; i < m; i++) { 669 work[j + i*N] = *v++; 670 } 671 } 672 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 673 } 674 ierr = PetscFree(work);CHKERRQ(ierr); 675 ierr = PetscFree(vv);CHKERRQ(ierr); 676 } else { 677 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 } 679 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 680 } 681 PetscFunctionReturn(0); 682 } 683 684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 685 #include <petscdraw.h> 686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 687 { 688 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 689 PetscErrorCode ierr; 690 PetscMPIInt rank = mdn->rank; 691 PetscViewerType vtype; 692 PetscBool iascii,isdraw; 693 PetscViewer sviewer; 694 PetscViewerFormat format; 695 696 PetscFunctionBegin; 697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 699 if (iascii) { 700 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 701 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 702 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 703 MatInfo info; 704 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 707 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 708 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 709 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } else if (format == PETSC_VIEWER_ASCII_INFO) { 712 PetscFunctionReturn(0); 713 } 714 } else if (isdraw) { 715 PetscDraw draw; 716 PetscBool isnull; 717 718 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 719 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 720 if (isnull) PetscFunctionReturn(0); 721 } 722 723 { 724 /* assemble the entire matrix onto first processor. */ 725 Mat A; 726 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 727 PetscInt *cols; 728 PetscScalar *vals; 729 730 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 737 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 738 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 740 741 /* Copy the matrix ... This isn't the most efficient means, 742 but it's quick for now */ 743 A->insertmode = INSERT_VALUES; 744 745 row = mat->rmap->rstart; 746 m = mdn->A->rmap->n; 747 for (i=0; i<m; i++) { 748 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 749 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 750 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 751 row++; 752 } 753 754 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 757 if (!rank) { 758 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 759 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 760 } 761 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 762 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 763 ierr = MatDestroy(&A);CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 769 { 770 PetscErrorCode ierr; 771 PetscBool iascii,isbinary,isdraw,issocket; 772 773 PetscFunctionBegin; 774 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 775 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 776 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 777 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 778 779 if (iascii || issocket || isdraw) { 780 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 781 } else if (isbinary) { 782 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 788 { 789 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 790 Mat mdn = mat->A; 791 PetscErrorCode ierr; 792 PetscReal isend[5],irecv[5]; 793 794 PetscFunctionBegin; 795 info->block_size = 1.0; 796 797 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 798 799 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 800 isend[3] = info->memory; isend[4] = info->mallocs; 801 if (flag == MAT_LOCAL) { 802 info->nz_used = isend[0]; 803 info->nz_allocated = isend[1]; 804 info->nz_unneeded = isend[2]; 805 info->memory = isend[3]; 806 info->mallocs = isend[4]; 807 } else if (flag == MAT_GLOBAL_MAX) { 808 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 809 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } else if (flag == MAT_GLOBAL_SUM) { 816 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 817 818 info->nz_used = irecv[0]; 819 info->nz_allocated = irecv[1]; 820 info->nz_unneeded = irecv[2]; 821 info->memory = irecv[3]; 822 info->mallocs = irecv[4]; 823 } 824 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 825 info->fill_ratio_needed = 0; 826 info->factor_mallocs = 0; 827 PetscFunctionReturn(0); 828 } 829 830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_NEW_NONZERO_LOCATIONS: 838 case MAT_NEW_NONZERO_LOCATION_ERR: 839 case MAT_NEW_NONZERO_ALLOCATION_ERR: 840 MatCheckPreallocated(A,1); 841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 842 break; 843 case MAT_ROW_ORIENTED: 844 MatCheckPreallocated(A,1); 845 a->roworiented = flg; 846 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 847 break; 848 case MAT_NEW_DIAGONALS: 849 case MAT_KEEP_NONZERO_PATTERN: 850 case MAT_USE_HASH_TABLE: 851 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 852 break; 853 case MAT_IGNORE_OFF_PROC_ENTRIES: 854 a->donotstash = flg; 855 break; 856 case MAT_SYMMETRIC: 857 case MAT_STRUCTURALLY_SYMMETRIC: 858 case MAT_HERMITIAN: 859 case MAT_SYMMETRY_ETERNAL: 860 case MAT_IGNORE_LOWER_TRIANGULAR: 861 case MAT_IGNORE_ZERO_ENTRIES: 862 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 863 break; 864 default: 865 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 866 } 867 PetscFunctionReturn(0); 868 } 869 870 871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 872 { 873 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 874 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 875 const PetscScalar *l,*r; 876 PetscScalar x,*v; 877 PetscErrorCode ierr; 878 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 879 880 PetscFunctionBegin; 881 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 882 if (ll) { 883 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 884 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 885 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 886 for (i=0; i<m; i++) { 887 x = l[i]; 888 v = mat->v + i; 889 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 890 } 891 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 892 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 893 } 894 if (rr) { 895 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 896 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 897 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 899 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 900 for (i=0; i<n; i++) { 901 x = r[i]; 902 v = mat->v + i*m; 903 for (j=0; j<m; j++) (*v++) *= x; 904 } 905 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 906 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 912 { 913 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 914 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 915 PetscErrorCode ierr; 916 PetscInt i,j; 917 PetscReal sum = 0.0; 918 PetscScalar *v = mat->v; 919 920 PetscFunctionBegin; 921 if (mdn->size == 1) { 922 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 923 } else { 924 if (type == NORM_FROBENIUS) { 925 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 926 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 927 } 928 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 *nrm = PetscSqrtReal(*nrm); 930 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 931 } else if (type == NORM_1) { 932 PetscReal *tmp,*tmp2; 933 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 934 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 935 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 936 *nrm = 0.0; 937 v = mat->v; 938 for (j=0; j<mdn->A->cmap->n; j++) { 939 for (i=0; i<mdn->A->rmap->n; i++) { 940 tmp[j] += PetscAbsScalar(*v); v++; 941 } 942 } 943 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 944 for (j=0; j<A->cmap->N; j++) { 945 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 946 } 947 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 948 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 949 } else if (type == NORM_INFINITY) { /* max row norm */ 950 PetscReal ntemp; 951 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 952 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 953 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 954 } 955 PetscFunctionReturn(0); 956 } 957 958 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 959 { 960 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 961 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 962 Mat B; 963 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 964 PetscErrorCode ierr; 965 PetscInt j,i; 966 PetscScalar *v; 967 968 PetscFunctionBegin; 969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 970 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 971 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 972 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 973 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 974 } else { 975 B = *matout; 976 } 977 978 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 979 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 980 for (i=0; i<m; i++) rwork[i] = rstart + i; 981 for (j=0; j<n; j++) { 982 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 983 v += m; 984 } 985 ierr = PetscFree(rwork);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 989 *matout = B; 990 } else { 991 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 999 1000 PetscErrorCode MatSetUp_MPIDense(Mat A) 1001 { 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1010 { 1011 PetscErrorCode ierr; 1012 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1013 1014 PetscFunctionBegin; 1015 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1016 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1021 { 1022 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1023 PetscErrorCode ierr; 1024 1025 PetscFunctionBegin; 1026 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 PetscErrorCode MatRealPart_MPIDense(Mat A) 1031 { 1032 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1033 PetscErrorCode ierr; 1034 1035 PetscFunctionBegin; 1036 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1041 { 1042 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1051 { 1052 PetscErrorCode ierr; 1053 PetscScalar *x; 1054 const PetscScalar *a; 1055 PetscInt lda; 1056 1057 PetscFunctionBegin; 1058 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1059 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1060 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1061 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1062 ierr = PetscMemcpy(x,a+col*lda,A->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr); 1063 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1064 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1069 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1070 { 1071 PetscErrorCode ierr; 1072 PetscInt i,n; 1073 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1074 PetscReal *work; 1075 1076 PetscFunctionBegin; 1077 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1078 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1079 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1080 if (type == NORM_2) { 1081 for (i=0; i<n; i++) work[i] *= work[i]; 1082 } 1083 if (type == NORM_INFINITY) { 1084 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1085 } else { 1086 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1087 } 1088 ierr = PetscFree(work);CHKERRQ(ierr); 1089 if (type == NORM_2) { 1090 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1091 } 1092 PetscFunctionReturn(0); 1093 } 1094 1095 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1096 { 1097 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1098 PetscErrorCode ierr; 1099 PetscScalar *a; 1100 PetscInt m,n,i; 1101 1102 PetscFunctionBegin; 1103 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1104 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1105 for (i=0; i<m*n; i++) { 1106 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1107 } 1108 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1113 1114 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1115 { 1116 PetscFunctionBegin; 1117 *missing = PETSC_FALSE; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1122 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1123 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1124 1125 /* -------------------------------------------------------------------*/ 1126 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1127 MatGetRow_MPIDense, 1128 MatRestoreRow_MPIDense, 1129 MatMult_MPIDense, 1130 /* 4*/ MatMultAdd_MPIDense, 1131 MatMultTranspose_MPIDense, 1132 MatMultTransposeAdd_MPIDense, 1133 0, 1134 0, 1135 0, 1136 /* 10*/ 0, 1137 0, 1138 0, 1139 0, 1140 MatTranspose_MPIDense, 1141 /* 15*/ MatGetInfo_MPIDense, 1142 MatEqual_MPIDense, 1143 MatGetDiagonal_MPIDense, 1144 MatDiagonalScale_MPIDense, 1145 MatNorm_MPIDense, 1146 /* 20*/ MatAssemblyBegin_MPIDense, 1147 MatAssemblyEnd_MPIDense, 1148 MatSetOption_MPIDense, 1149 MatZeroEntries_MPIDense, 1150 /* 24*/ MatZeroRows_MPIDense, 1151 0, 1152 0, 1153 0, 1154 0, 1155 /* 29*/ MatSetUp_MPIDense, 1156 0, 1157 0, 1158 MatGetDiagonalBlock_MPIDense, 1159 0, 1160 /* 34*/ MatDuplicate_MPIDense, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /* 39*/ MatAXPY_MPIDense, 1166 MatCreateSubMatrices_MPIDense, 1167 0, 1168 MatGetValues_MPIDense, 1169 0, 1170 /* 44*/ 0, 1171 MatScale_MPIDense, 1172 MatShift_Basic, 1173 0, 1174 0, 1175 /* 49*/ MatSetRandom_MPIDense, 1176 0, 1177 0, 1178 0, 1179 0, 1180 /* 54*/ 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /* 59*/ MatCreateSubMatrix_MPIDense, 1186 MatDestroy_MPIDense, 1187 MatView_MPIDense, 1188 0, 1189 0, 1190 /* 64*/ 0, 1191 0, 1192 0, 1193 0, 1194 0, 1195 /* 69*/ 0, 1196 0, 1197 0, 1198 0, 1199 0, 1200 /* 74*/ 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 /* 79*/ 0, 1206 0, 1207 0, 1208 0, 1209 /* 83*/ MatLoad_MPIDense, 1210 0, 1211 0, 1212 0, 1213 0, 1214 0, 1215 #if defined(PETSC_HAVE_ELEMENTAL) 1216 /* 89*/ MatMatMult_MPIDense_MPIDense, 1217 MatMatMultSymbolic_MPIDense_MPIDense, 1218 #else 1219 /* 89*/ 0, 1220 0, 1221 #endif 1222 MatMatMultNumeric_MPIDense, 1223 0, 1224 0, 1225 /* 94*/ 0, 1226 MatMatTransposeMult_MPIDense_MPIDense, 1227 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1228 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1229 0, 1230 /* 99*/ 0, 1231 0, 1232 0, 1233 MatConjugate_MPIDense, 1234 0, 1235 /*104*/ 0, 1236 MatRealPart_MPIDense, 1237 MatImaginaryPart_MPIDense, 1238 0, 1239 0, 1240 /*109*/ 0, 1241 0, 1242 0, 1243 MatGetColumnVector_MPIDense, 1244 MatMissingDiagonal_MPIDense, 1245 /*114*/ 0, 1246 0, 1247 0, 1248 0, 1249 0, 1250 /*119*/ 0, 1251 0, 1252 0, 1253 0, 1254 0, 1255 /*124*/ 0, 1256 MatGetColumnNorms_MPIDense, 1257 0, 1258 0, 1259 0, 1260 /*129*/ 0, 1261 MatTransposeMatMult_MPIDense_MPIDense, 1262 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1263 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1264 0, 1265 /*134*/ 0, 1266 0, 1267 0, 1268 0, 1269 0, 1270 /*139*/ 0, 1271 0, 1272 0, 1273 0, 1274 0, 1275 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1276 }; 1277 1278 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1279 { 1280 Mat_MPIDense *a; 1281 PetscErrorCode ierr; 1282 1283 PetscFunctionBegin; 1284 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1285 mat->preallocated = PETSC_TRUE; 1286 /* Note: For now, when data is specified above, this assumes the user correctly 1287 allocates the local dense storage space. We should add error checking. */ 1288 1289 a = (Mat_MPIDense*)mat->data; 1290 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1291 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1292 a->nvec = mat->cmap->n; 1293 1294 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1295 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1296 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1297 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1298 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #if defined(PETSC_HAVE_ELEMENTAL) 1303 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1304 { 1305 Mat mat_elemental; 1306 PetscErrorCode ierr; 1307 PetscScalar *v; 1308 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1309 1310 PetscFunctionBegin; 1311 if (reuse == MAT_REUSE_MATRIX) { 1312 mat_elemental = *newmat; 1313 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1314 } else { 1315 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1316 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1317 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1318 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1319 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1320 } 1321 1322 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1323 for (i=0; i<N; i++) cols[i] = i; 1324 for (i=0; i<m; i++) rows[i] = rstart + i; 1325 1326 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1327 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1328 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1329 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1330 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1331 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1332 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1333 1334 if (reuse == MAT_INPLACE_MATRIX) { 1335 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1336 } else { 1337 *newmat = mat_elemental; 1338 } 1339 PetscFunctionReturn(0); 1340 } 1341 #endif 1342 1343 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1344 { 1345 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1346 PetscErrorCode ierr; 1347 1348 PetscFunctionBegin; 1349 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1354 { 1355 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1364 { 1365 PetscErrorCode ierr; 1366 Mat_MPIDense *mat; 1367 PetscInt m,nloc,N; 1368 1369 PetscFunctionBegin; 1370 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1371 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1372 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1373 PetscInt sum; 1374 1375 if (n == PETSC_DECIDE) { 1376 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1377 } 1378 /* Check sum(n) = N */ 1379 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1380 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1381 1382 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1383 } 1384 1385 /* numeric phase */ 1386 mat = (Mat_MPIDense*)(*outmat)->data; 1387 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1388 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1394 { 1395 Mat_MPIDense *a; 1396 PetscErrorCode ierr; 1397 1398 PetscFunctionBegin; 1399 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1400 mat->data = (void*)a; 1401 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1402 1403 mat->insertmode = NOT_SET_VALUES; 1404 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1405 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1406 1407 /* build cache for off array entries formed */ 1408 a->donotstash = PETSC_FALSE; 1409 1410 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1411 1412 /* stuff used for matrix vector multiply */ 1413 a->lvec = 0; 1414 a->Mvctx = 0; 1415 a->roworiented = PETSC_TRUE; 1416 1417 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1418 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1423 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1424 #if defined(PETSC_HAVE_ELEMENTAL) 1425 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1426 #endif 1427 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1428 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1429 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1430 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1431 1432 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1433 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1436 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1437 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 /*MC 1442 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1443 1444 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1445 and MATMPIDENSE otherwise. 1446 1447 Options Database Keys: 1448 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1449 1450 Level: beginner 1451 1452 1453 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1454 M*/ 1455 1456 /*@C 1457 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1458 1459 Not collective 1460 1461 Input Parameters: 1462 . B - the matrix 1463 - data - optional location of matrix data. Set data=NULL for PETSc 1464 to control all matrix memory allocation. 1465 1466 Notes: 1467 The dense format is fully compatible with standard Fortran 77 1468 storage by columns. 1469 1470 The data input variable is intended primarily for Fortran programmers 1471 who wish to allocate their own matrix memory space. Most users should 1472 set data=NULL. 1473 1474 Level: intermediate 1475 1476 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1477 @*/ 1478 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1479 { 1480 PetscErrorCode ierr; 1481 1482 PetscFunctionBegin; 1483 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 /*@ 1488 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1489 array provided by the user. This is useful to avoid copying an array 1490 into a matrix 1491 1492 Not Collective 1493 1494 Input Parameters: 1495 + mat - the matrix 1496 - array - the array in column major order 1497 1498 Notes: 1499 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1500 freed when the matrix is destroyed. 1501 1502 Level: developer 1503 1504 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1505 1506 @*/ 1507 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1508 { 1509 PetscErrorCode ierr; 1510 PetscFunctionBegin; 1511 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1512 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 /*@ 1517 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1518 1519 Not Collective 1520 1521 Input Parameters: 1522 . mat - the matrix 1523 1524 Notes: 1525 You can only call this after a call to MatDensePlaceArray() 1526 1527 Level: developer 1528 1529 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1530 1531 @*/ 1532 PetscErrorCode MatDenseResetArray(Mat mat) 1533 { 1534 PetscErrorCode ierr; 1535 PetscFunctionBegin; 1536 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1537 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 /*@C 1542 MatCreateDense - Creates a parallel matrix in dense format. 1543 1544 Collective 1545 1546 Input Parameters: 1547 + comm - MPI communicator 1548 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1549 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1550 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1551 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1552 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1553 to control all matrix memory allocation. 1554 1555 Output Parameter: 1556 . A - the matrix 1557 1558 Notes: 1559 The dense format is fully compatible with standard Fortran 77 1560 storage by columns. 1561 1562 The data input variable is intended primarily for Fortran programmers 1563 who wish to allocate their own matrix memory space. Most users should 1564 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1565 1566 The user MUST specify either the local or global matrix dimensions 1567 (possibly both). 1568 1569 Level: intermediate 1570 1571 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1572 @*/ 1573 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1574 { 1575 PetscErrorCode ierr; 1576 PetscMPIInt size; 1577 1578 PetscFunctionBegin; 1579 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1580 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1581 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1582 if (size > 1) { 1583 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1584 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1585 if (data) { /* user provided data array, so no need to assemble */ 1586 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1587 (*A)->assembled = PETSC_TRUE; 1588 } 1589 } else { 1590 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1591 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1592 } 1593 PetscFunctionReturn(0); 1594 } 1595 1596 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1597 { 1598 Mat mat; 1599 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1600 PetscErrorCode ierr; 1601 1602 PetscFunctionBegin; 1603 *newmat = 0; 1604 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1605 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1606 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1607 a = (Mat_MPIDense*)mat->data; 1608 1609 mat->factortype = A->factortype; 1610 mat->assembled = PETSC_TRUE; 1611 mat->preallocated = PETSC_TRUE; 1612 1613 a->size = oldmat->size; 1614 a->rank = oldmat->rank; 1615 mat->insertmode = NOT_SET_VALUES; 1616 a->nvec = oldmat->nvec; 1617 a->donotstash = oldmat->donotstash; 1618 1619 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1620 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1621 1622 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1623 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1624 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1625 1626 *newmat = mat; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1631 { 1632 PetscErrorCode ierr; 1633 PetscMPIInt rank,size; 1634 const PetscInt *rowners; 1635 PetscInt i,m,n,nz,j,mMax; 1636 PetscScalar *array,*vals,*vals_ptr; 1637 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1638 1639 PetscFunctionBegin; 1640 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1641 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1642 1643 /* determine ownership of rows and columns */ 1644 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1645 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1646 1647 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1648 if (!a->A) { 1649 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1650 } 1651 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1652 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1653 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1654 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1655 if (!rank) { 1656 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1657 1658 /* read in my part of the matrix numerical values */ 1659 ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1660 1661 /* insert into matrix-by row (this is why cannot directly read into array */ 1662 vals_ptr = vals; 1663 for (i=0; i<m; i++) { 1664 for (j=0; j<N; j++) { 1665 array[i + j*m] = *vals_ptr++; 1666 } 1667 } 1668 1669 /* read in other processors and ship out */ 1670 for (i=1; i<size; i++) { 1671 nz = (rowners[i+1] - rowners[i])*N; 1672 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1673 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1674 } 1675 } else { 1676 /* receive numeric values */ 1677 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1678 1679 /* receive message of values*/ 1680 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1681 1682 /* insert into matrix-by row (this is why cannot directly read into array */ 1683 vals_ptr = vals; 1684 for (i=0; i<m; i++) { 1685 for (j=0; j<N; j++) { 1686 array[i + j*m] = *vals_ptr++; 1687 } 1688 } 1689 } 1690 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1691 ierr = PetscFree(vals);CHKERRQ(ierr); 1692 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 PetscFunctionReturn(0); 1695 } 1696 1697 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer) 1698 { 1699 Mat_MPIDense *a; 1700 PetscScalar *vals,*svals; 1701 MPI_Comm comm; 1702 MPI_Status status; 1703 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1704 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1705 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1706 PetscInt i,nz,j,rstart,rend; 1707 int fd; 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1712 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1713 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1714 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1715 if (!rank) { 1716 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 1717 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1718 } 1719 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1720 M = header[1]; N = header[2]; nz = header[3]; 1721 1722 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1723 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1724 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1725 1726 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1727 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1728 1729 /* 1730 Handle case where matrix is stored on disk as a dense matrix 1731 */ 1732 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1733 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1734 PetscFunctionReturn(0); 1735 } 1736 1737 /* determine ownership of all rows */ 1738 if (newmat->rmap->n < 0) { 1739 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1740 } else { 1741 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1742 } 1743 if (newmat->cmap->n < 0) { 1744 n = PETSC_DECIDE; 1745 } else { 1746 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1747 } 1748 1749 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1750 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1751 rowners[0] = 0; 1752 for (i=2; i<=size; i++) { 1753 rowners[i] += rowners[i-1]; 1754 } 1755 rstart = rowners[rank]; 1756 rend = rowners[rank+1]; 1757 1758 /* distribute row lengths to all processors */ 1759 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1760 if (!rank) { 1761 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1762 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 1763 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1764 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1765 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1766 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1767 } else { 1768 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1769 } 1770 1771 if (!rank) { 1772 /* calculate the number of nonzeros on each processor */ 1773 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1774 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1775 for (i=0; i<size; i++) { 1776 for (j=rowners[i]; j< rowners[i+1]; j++) { 1777 procsnz[i] += rowlengths[j]; 1778 } 1779 } 1780 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1781 1782 /* determine max buffer needed and allocate it */ 1783 maxnz = 0; 1784 for (i=0; i<size; i++) { 1785 maxnz = PetscMax(maxnz,procsnz[i]); 1786 } 1787 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1788 1789 /* read in my part of the matrix column indices */ 1790 nz = procsnz[0]; 1791 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1792 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1793 1794 /* read in every one elses and ship off */ 1795 for (i=1; i<size; i++) { 1796 nz = procsnz[i]; 1797 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1798 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1799 } 1800 ierr = PetscFree(cols);CHKERRQ(ierr); 1801 } else { 1802 /* determine buffer space needed for message */ 1803 nz = 0; 1804 for (i=0; i<m; i++) { 1805 nz += ourlens[i]; 1806 } 1807 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1808 1809 /* receive message of column indices*/ 1810 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1811 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1812 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1813 } 1814 1815 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1816 a = (Mat_MPIDense*)newmat->data; 1817 if (!a->A) { 1818 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1819 } 1820 1821 if (!rank) { 1822 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1823 1824 /* read in my part of the matrix numerical values */ 1825 nz = procsnz[0]; 1826 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1827 1828 /* insert into matrix */ 1829 jj = rstart; 1830 smycols = mycols; 1831 svals = vals; 1832 for (i=0; i<m; i++) { 1833 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1834 smycols += ourlens[i]; 1835 svals += ourlens[i]; 1836 jj++; 1837 } 1838 1839 /* read in other processors and ship out */ 1840 for (i=1; i<size; i++) { 1841 nz = procsnz[i]; 1842 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1843 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1844 } 1845 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1846 } else { 1847 /* receive numeric values */ 1848 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1849 1850 /* receive message of values*/ 1851 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1852 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1853 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1854 1855 /* insert into matrix */ 1856 jj = rstart; 1857 smycols = mycols; 1858 svals = vals; 1859 for (i=0; i<m; i++) { 1860 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1861 smycols += ourlens[i]; 1862 svals += ourlens[i]; 1863 jj++; 1864 } 1865 } 1866 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1867 ierr = PetscFree(vals);CHKERRQ(ierr); 1868 ierr = PetscFree(mycols);CHKERRQ(ierr); 1869 ierr = PetscFree(rowners);CHKERRQ(ierr); 1870 1871 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1877 { 1878 PetscBool isbinary, ishdf5; 1879 PetscErrorCode ierr; 1880 1881 PetscFunctionBegin; 1882 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1883 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1884 /* force binary viewer to load .info file if it has not yet done so */ 1885 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1886 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1887 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1888 if (isbinary) { 1889 ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr); 1890 } else if (ishdf5) { 1891 #if defined(PETSC_HAVE_HDF5) 1892 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1893 #else 1894 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 1895 #endif 1896 } else { 1897 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1903 { 1904 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1905 Mat a,b; 1906 PetscBool flg; 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 a = matA->A; 1911 b = matB->A; 1912 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1913 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1918 { 1919 PetscErrorCode ierr; 1920 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1921 Mat_TransMatMultDense *atb = a->atbdense; 1922 1923 PetscFunctionBegin; 1924 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1925 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1926 ierr = PetscFree(atb);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1931 { 1932 PetscErrorCode ierr; 1933 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1934 Mat_MatTransMultDense *abt = a->abtdense; 1935 1936 PetscFunctionBegin; 1937 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1938 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1939 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1940 ierr = PetscFree(abt);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1945 { 1946 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1947 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1948 Mat_TransMatMultDense *atb = c->atbdense; 1949 PetscErrorCode ierr; 1950 MPI_Comm comm; 1951 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1952 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1953 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1954 PetscScalar _DOne=1.0,_DZero=0.0; 1955 PetscBLASInt am,an,bn,aN; 1956 const PetscInt *ranges; 1957 1958 PetscFunctionBegin; 1959 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1960 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1961 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1962 1963 /* compute atbarray = aseq^T * bseq */ 1964 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1965 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1966 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1967 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1968 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1969 1970 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1971 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1972 1973 /* arrange atbarray into sendbuf */ 1974 k = 0; 1975 for (proc=0; proc<size; proc++) { 1976 for (j=0; j<cN; j++) { 1977 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1978 } 1979 } 1980 /* sum all atbarray to local values of C */ 1981 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1982 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1983 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1988 { 1989 PetscErrorCode ierr; 1990 Mat Cdense; 1991 MPI_Comm comm; 1992 PetscMPIInt size; 1993 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1994 Mat_MPIDense *c; 1995 Mat_TransMatMultDense *atb; 1996 1997 PetscFunctionBegin; 1998 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1999 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2000 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2001 } 2002 2003 /* create matrix product Cdense */ 2004 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2005 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2006 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2007 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2008 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2009 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2010 *C = Cdense; 2011 2012 /* create data structure for reuse Cdense */ 2013 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2014 ierr = PetscNew(&atb);CHKERRQ(ierr); 2015 cM = Cdense->rmap->N; 2016 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 2017 2018 c = (Mat_MPIDense*)Cdense->data; 2019 c->atbdense = atb; 2020 atb->destroy = Cdense->ops->destroy; 2021 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2022 PetscFunctionReturn(0); 2023 } 2024 2025 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2026 { 2027 PetscErrorCode ierr; 2028 2029 PetscFunctionBegin; 2030 if (scall == MAT_INITIAL_MATRIX) { 2031 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2032 } 2033 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 2038 { 2039 PetscErrorCode ierr; 2040 Mat Cdense; 2041 MPI_Comm comm; 2042 PetscMPIInt i, size; 2043 PetscInt maxRows, bufsiz; 2044 Mat_MPIDense *c; 2045 PetscMPIInt tag; 2046 const char *algTypes[2] = {"allgatherv","cyclic"}; 2047 PetscInt alg, nalg = 2; 2048 Mat_MatTransMultDense *abt; 2049 2050 PetscFunctionBegin; 2051 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2052 alg = 0; /* default is allgatherv */ 2053 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2054 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 2055 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2056 if (A->cmap->N != B->cmap->N) { 2057 SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 2058 } 2059 2060 /* create matrix product Cdense */ 2061 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2062 ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2063 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2064 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2065 ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 2066 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 *C = Cdense; 2069 2070 /* create data structure for reuse Cdense */ 2071 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2072 ierr = PetscNew(&abt);CHKERRQ(ierr); 2073 abt->tag = tag; 2074 abt->alg = alg; 2075 switch (alg) { 2076 case 1: 2077 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2078 bufsiz = A->cmap->N * maxRows; 2079 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2080 break; 2081 default: 2082 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2083 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2084 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2085 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2086 break; 2087 } 2088 2089 c = (Mat_MPIDense*)Cdense->data; 2090 c->abtdense = abt; 2091 abt->destroy = Cdense->ops->destroy; 2092 Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2093 PetscFunctionReturn(0); 2094 } 2095 2096 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2097 { 2098 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2099 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2100 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2101 Mat_MatTransMultDense *abt = c->abtdense; 2102 PetscErrorCode ierr; 2103 MPI_Comm comm; 2104 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2105 PetscScalar *sendbuf, *recvbuf=0, *carray; 2106 PetscInt i,cK=A->cmap->N,k,j,bn; 2107 PetscScalar _DOne=1.0,_DZero=0.0; 2108 PetscBLASInt cm, cn, ck; 2109 MPI_Request reqs[2]; 2110 const PetscInt *ranges; 2111 2112 PetscFunctionBegin; 2113 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2114 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2115 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2116 2117 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2118 bn = B->rmap->n; 2119 if (bseq->lda == bn) { 2120 sendbuf = bseq->v; 2121 } else { 2122 sendbuf = abt->buf[0]; 2123 for (k = 0, i = 0; i < cK; i++) { 2124 for (j = 0; j < bn; j++, k++) { 2125 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2126 } 2127 } 2128 } 2129 if (size > 1) { 2130 sendto = (rank + size - 1) % size; 2131 recvfrom = (rank + size + 1) % size; 2132 } else { 2133 sendto = recvfrom = 0; 2134 } 2135 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2137 recvisfrom = rank; 2138 for (i = 0; i < size; i++) { 2139 /* we have finished receiving in sending, bufs can be read/modified */ 2140 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2141 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2142 2143 if (nextrecvisfrom != rank) { 2144 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2145 sendsiz = cK * bn; 2146 recvsiz = cK * nextbn; 2147 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2148 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2149 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2150 } 2151 2152 /* local aseq * sendbuf^T */ 2153 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2154 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 2155 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 2156 2157 if (nextrecvisfrom != rank) { 2158 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2159 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2160 } 2161 bn = nextbn; 2162 recvisfrom = nextrecvisfrom; 2163 sendbuf = recvbuf; 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2169 { 2170 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2171 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2172 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2173 Mat_MatTransMultDense *abt = c->abtdense; 2174 PetscErrorCode ierr; 2175 MPI_Comm comm; 2176 PetscMPIInt rank,size; 2177 PetscScalar *sendbuf, *recvbuf; 2178 PetscInt i,cK=A->cmap->N,k,j,bn; 2179 PetscScalar _DOne=1.0,_DZero=0.0; 2180 PetscBLASInt cm, cn, ck; 2181 2182 PetscFunctionBegin; 2183 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2185 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2186 2187 /* copy transpose of B into buf[0] */ 2188 bn = B->rmap->n; 2189 sendbuf = abt->buf[0]; 2190 recvbuf = abt->buf[1]; 2191 for (k = 0, j = 0; j < bn; j++) { 2192 for (i = 0; i < cK; i++, k++) { 2193 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2194 } 2195 } 2196 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2197 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2198 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2199 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2200 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2205 { 2206 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2207 Mat_MatTransMultDense *abt = c->abtdense; 2208 PetscErrorCode ierr; 2209 2210 PetscFunctionBegin; 2211 switch (abt->alg) { 2212 case 1: 2213 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2214 break; 2215 default: 2216 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2217 break; 2218 } 2219 PetscFunctionReturn(0); 2220 } 2221 2222 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 2223 { 2224 PetscErrorCode ierr; 2225 2226 PetscFunctionBegin; 2227 if (scall == MAT_INITIAL_MATRIX) { 2228 ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2229 } 2230 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2238 Mat_MatMultDense *ab = a->abdense; 2239 2240 PetscFunctionBegin; 2241 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2242 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2243 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2244 2245 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2246 ierr = PetscFree(ab);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #if defined(PETSC_HAVE_ELEMENTAL) 2251 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2252 { 2253 PetscErrorCode ierr; 2254 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2255 Mat_MatMultDense *ab=c->abdense; 2256 2257 PetscFunctionBegin; 2258 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2259 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2260 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2261 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 2266 { 2267 PetscErrorCode ierr; 2268 Mat Ae,Be,Ce; 2269 Mat_MPIDense *c; 2270 Mat_MatMultDense *ab; 2271 2272 PetscFunctionBegin; 2273 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2274 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2275 } 2276 2277 /* convert A and B to Elemental matrices Ae and Be */ 2278 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2279 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2280 2281 /* Ce = Ae*Be */ 2282 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2283 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2284 2285 /* convert Ce to C */ 2286 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2287 2288 /* create data structure for reuse Cdense */ 2289 ierr = PetscNew(&ab);CHKERRQ(ierr); 2290 c = (Mat_MPIDense*)(*C)->data; 2291 c->abdense = ab; 2292 2293 ab->Ae = Ae; 2294 ab->Be = Be; 2295 ab->Ce = Ce; 2296 ab->destroy = (*C)->ops->destroy; 2297 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2298 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2303 { 2304 PetscErrorCode ierr; 2305 2306 PetscFunctionBegin; 2307 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2308 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2309 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2310 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2311 } else { 2312 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2313 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2314 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2315 } 2316 PetscFunctionReturn(0); 2317 } 2318 #endif 2319 2320