1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 198 { 199 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 200 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 201 PetscErrorCode ierr; 202 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 203 const PetscInt *irow,*icol; 204 PetscScalar *av,*bv,*v = lmat->v; 205 Mat newmat; 206 IS iscol_local; 207 MPI_Comm comm_is,comm_mat; 208 209 PetscFunctionBegin; 210 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 211 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 212 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 213 214 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 215 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 216 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 218 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 219 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 220 221 /* No parallel redistribution currently supported! Should really check each index set 222 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 223 original matrix! */ 224 225 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 226 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 227 228 /* Check submatrix call */ 229 if (scall == MAT_REUSE_MATRIX) { 230 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 231 /* Really need to test rows and column sizes! */ 232 newmat = *B; 233 } else { 234 /* Create and fill new matrix */ 235 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 236 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 237 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 238 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 239 } 240 241 /* Now extract the data pointers and do the copy, column at a time */ 242 newmatd = (Mat_MPIDense*)newmat->data; 243 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 244 245 for (i=0; i<Ncols; i++) { 246 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 247 for (j=0; j<nrows; j++) { 248 *bv++ = av[irow[j] - rstart]; 249 } 250 } 251 252 /* Assemble the matrices so that the correct flags are set */ 253 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 256 /* Free work space */ 257 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 259 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 260 *B = newmat; 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 287 MPI_Comm comm; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 294 /* make sure all processors are either in INSERTMODE or ADDMODE */ 295 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 296 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 297 mat->insertmode = addv; /* in case this processor had no cache */ 298 299 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 300 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 301 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 306 { 307 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 308 PetscErrorCode ierr; 309 PetscInt i,*row,*col,flg,j,rstart,ncols; 310 PetscMPIInt n; 311 PetscScalar *val; 312 313 PetscFunctionBegin; 314 /* wait on receives */ 315 while (1) { 316 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 317 if (!flg) break; 318 319 for (i=0; i<n;) { 320 /* Now identify the consecutive vals belonging to the same row */ 321 for (j=i,rstart=row[j]; j<n; j++) { 322 if (row[j] != rstart) break; 323 } 324 if (j < n) ncols = j-i; 325 else ncols = n-i; 326 /* Now assemble all these values with a single function call */ 327 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 328 i = j; 329 } 330 } 331 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 332 333 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 334 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 335 336 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 337 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 343 { 344 PetscErrorCode ierr; 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 347 PetscFunctionBegin; 348 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 /* the code does not do the diagonal entries correctly unless the 353 matrix is square and the column and row owerships are identical. 354 This is a BUG. The only way to fix it seems to be to access 355 mdn->A and mdn->B directly and not through the MatZeroRows() 356 routine. 357 */ 358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 359 { 360 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 361 PetscErrorCode ierr; 362 PetscInt i,*owners = A->rmap->range; 363 PetscInt *sizes,j,idx,nsends; 364 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 365 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 366 PetscInt *lens,*lrows,*values; 367 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 368 MPI_Comm comm; 369 MPI_Request *send_waits,*recv_waits; 370 MPI_Status recv_status,*send_status; 371 PetscBool found; 372 const PetscScalar *xx; 373 PetscScalar *bb; 374 375 PetscFunctionBegin; 376 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 377 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 378 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 379 /* first count number of contributors to each processor */ 380 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 381 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 382 for (i=0; i<N; i++) { 383 idx = rows[i]; 384 found = PETSC_FALSE; 385 for (j=0; j<size; j++) { 386 if (idx >= owners[j] && idx < owners[j+1]) { 387 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 388 } 389 } 390 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 391 } 392 nsends = 0; 393 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 394 395 /* inform other processors of number of messages and max length*/ 396 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 397 398 /* post receives: */ 399 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 400 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 401 for (i=0; i<nrecvs; i++) { 402 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 403 } 404 405 /* do sends: 406 1) starts[i] gives the starting index in svalues for stuff going to 407 the ith processor 408 */ 409 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 410 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 411 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 412 413 starts[0] = 0; 414 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 415 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 416 417 starts[0] = 0; 418 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 419 count = 0; 420 for (i=0; i<size; i++) { 421 if (sizes[2*i+1]) { 422 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 423 } 424 } 425 ierr = PetscFree(starts);CHKERRQ(ierr); 426 427 base = owners[rank]; 428 429 /* wait on receives */ 430 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 431 count = nrecvs; 432 slen = 0; 433 while (count) { 434 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 435 /* unpack receives into our local space */ 436 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 437 438 source[imdex] = recv_status.MPI_SOURCE; 439 lens[imdex] = n; 440 slen += n; 441 count--; 442 } 443 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 444 445 /* move the data into the send scatter */ 446 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 447 count = 0; 448 for (i=0; i<nrecvs; i++) { 449 values = rvalues + i*nmax; 450 for (j=0; j<lens[i]; j++) { 451 lrows[count++] = values[j] - base; 452 } 453 } 454 ierr = PetscFree(rvalues);CHKERRQ(ierr); 455 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 456 ierr = PetscFree(owner);CHKERRQ(ierr); 457 ierr = PetscFree(sizes);CHKERRQ(ierr); 458 459 /* fix right hand side if needed */ 460 if (x && b) { 461 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 462 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 463 for (i=0; i<slen; i++) { 464 bb[lrows[i]] = diag*xx[lrows[i]]; 465 } 466 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 467 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 468 } 469 470 /* actually zap the local rows */ 471 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 472 if (diag != 0.0) { 473 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 474 PetscInt m = ll->lda, i; 475 476 for (i=0; i<slen; i++) { 477 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 478 } 479 } 480 ierr = PetscFree(lrows);CHKERRQ(ierr); 481 482 /* wait on sends */ 483 if (nsends) { 484 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 485 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 486 ierr = PetscFree(send_status);CHKERRQ(ierr); 487 } 488 ierr = PetscFree(send_waits);CHKERRQ(ierr); 489 ierr = PetscFree(svalues);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 497 498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 501 PetscErrorCode ierr; 502 503 PetscFunctionBegin; 504 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 523 { 524 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 525 PetscErrorCode ierr; 526 PetscScalar zero = 0.0; 527 528 PetscFunctionBegin; 529 ierr = VecSet(yy,zero);CHKERRQ(ierr); 530 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 531 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 537 { 538 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 543 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 544 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 545 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 552 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 553 PetscErrorCode ierr; 554 PetscInt len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 557 PetscFunctionBegin; 558 ierr = VecSet(v,zero);CHKERRQ(ierr); 559 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 560 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 561 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 563 radd = A->rmap->rstart*m; 564 for (i=0; i<len; i++) { 565 x[i] = aloc->v[radd + i*m + i]; 566 } 567 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 PetscErrorCode MatDestroy_MPIDense(Mat mat) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 579 #endif 580 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 581 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 582 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 583 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 584 585 ierr = PetscFree(mat->data);CHKERRQ(ierr); 586 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 587 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 595 #if defined(PETSC_HAVE_ELEMENTAL) 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 597 #endif 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } 609 610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 611 { 612 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 613 PetscErrorCode ierr; 614 PetscViewerFormat format; 615 int fd; 616 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 617 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 618 PetscScalar *work,*v,*vv; 619 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 620 621 PetscFunctionBegin; 622 if (mdn->size == 1) { 623 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 624 } else { 625 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 626 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 628 629 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 630 if (format == PETSC_VIEWER_NATIVE) { 631 632 if (!rank) { 633 /* store the matrix as a dense matrix */ 634 header[0] = MAT_FILE_CLASSID; 635 header[1] = mat->rmap->N; 636 header[2] = N; 637 header[3] = MATRIX_BINARY_FORMAT_DENSE; 638 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 639 640 /* get largest work array needed for transposing array */ 641 mmax = mat->rmap->n; 642 for (i=1; i<size; i++) { 643 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 644 } 645 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 646 647 /* write out local array, by rows */ 648 m = mat->rmap->n; 649 v = a->v; 650 for (j=0; j<N; j++) { 651 for (i=0; i<m; i++) { 652 work[j + i*N] = *v++; 653 } 654 } 655 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 656 /* get largest work array to receive messages from other processes, excludes process zero */ 657 mmax = 0; 658 for (i=1; i<size; i++) { 659 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 660 } 661 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 662 for (k = 1; k < size; k++) { 663 v = vv; 664 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 665 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 666 667 for (j = 0; j < N; j++) { 668 for (i = 0; i < m; i++) { 669 work[j + i*N] = *v++; 670 } 671 } 672 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 673 } 674 ierr = PetscFree(work);CHKERRQ(ierr); 675 ierr = PetscFree(vv);CHKERRQ(ierr); 676 } else { 677 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 678 } 679 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 680 } 681 PetscFunctionReturn(0); 682 } 683 684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 685 #include <petscdraw.h> 686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 687 { 688 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 689 PetscErrorCode ierr; 690 PetscMPIInt rank = mdn->rank; 691 PetscViewerType vtype; 692 PetscBool iascii,isdraw; 693 PetscViewer sviewer; 694 PetscViewerFormat format; 695 696 PetscFunctionBegin; 697 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 699 if (iascii) { 700 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 701 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 702 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 703 MatInfo info; 704 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 705 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 706 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 707 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 708 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 709 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } else if (format == PETSC_VIEWER_ASCII_INFO) { 712 PetscFunctionReturn(0); 713 } 714 } else if (isdraw) { 715 PetscDraw draw; 716 PetscBool isnull; 717 718 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 719 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 720 if (isnull) PetscFunctionReturn(0); 721 } 722 723 { 724 /* assemble the entire matrix onto first processor. */ 725 Mat A; 726 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 727 PetscInt *cols; 728 PetscScalar *vals; 729 730 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 731 if (!rank) { 732 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 733 } else { 734 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 735 } 736 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 737 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 738 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 740 741 /* Copy the matrix ... This isn't the most efficient means, 742 but it's quick for now */ 743 A->insertmode = INSERT_VALUES; 744 745 row = mat->rmap->rstart; 746 m = mdn->A->rmap->n; 747 for (i=0; i<m; i++) { 748 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 749 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 750 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 751 row++; 752 } 753 754 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 755 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 757 if (!rank) { 758 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 759 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 760 } 761 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 762 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 763 ierr = MatDestroy(&A);CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 769 { 770 PetscErrorCode ierr; 771 PetscBool iascii,isbinary,isdraw,issocket; 772 773 PetscFunctionBegin; 774 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 775 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 776 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 777 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 778 779 if (iascii || issocket || isdraw) { 780 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 781 } else if (isbinary) { 782 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 788 { 789 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 790 Mat mdn = mat->A; 791 PetscErrorCode ierr; 792 PetscReal isend[5],irecv[5]; 793 794 PetscFunctionBegin; 795 info->block_size = 1.0; 796 797 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 798 799 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 800 isend[3] = info->memory; isend[4] = info->mallocs; 801 if (flag == MAT_LOCAL) { 802 info->nz_used = isend[0]; 803 info->nz_allocated = isend[1]; 804 info->nz_unneeded = isend[2]; 805 info->memory = isend[3]; 806 info->mallocs = isend[4]; 807 } else if (flag == MAT_GLOBAL_MAX) { 808 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 809 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } else if (flag == MAT_GLOBAL_SUM) { 816 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 817 818 info->nz_used = irecv[0]; 819 info->nz_allocated = irecv[1]; 820 info->nz_unneeded = irecv[2]; 821 info->memory = irecv[3]; 822 info->mallocs = irecv[4]; 823 } 824 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 825 info->fill_ratio_needed = 0; 826 info->factor_mallocs = 0; 827 PetscFunctionReturn(0); 828 } 829 830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_NEW_NONZERO_LOCATIONS: 838 case MAT_NEW_NONZERO_LOCATION_ERR: 839 case MAT_NEW_NONZERO_ALLOCATION_ERR: 840 MatCheckPreallocated(A,1); 841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 842 break; 843 case MAT_ROW_ORIENTED: 844 MatCheckPreallocated(A,1); 845 a->roworiented = flg; 846 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 847 break; 848 case MAT_NEW_DIAGONALS: 849 case MAT_KEEP_NONZERO_PATTERN: 850 case MAT_USE_HASH_TABLE: 851 case MAT_SORTED_FULL: 852 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 853 break; 854 case MAT_IGNORE_OFF_PROC_ENTRIES: 855 a->donotstash = flg; 856 break; 857 case MAT_SYMMETRIC: 858 case MAT_STRUCTURALLY_SYMMETRIC: 859 case MAT_HERMITIAN: 860 case MAT_SYMMETRY_ETERNAL: 861 case MAT_IGNORE_LOWER_TRIANGULAR: 862 case MAT_IGNORE_ZERO_ENTRIES: 863 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 864 break; 865 default: 866 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 867 } 868 PetscFunctionReturn(0); 869 } 870 871 872 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 873 { 874 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 875 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 876 const PetscScalar *l,*r; 877 PetscScalar x,*v; 878 PetscErrorCode ierr; 879 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 880 881 PetscFunctionBegin; 882 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 883 if (ll) { 884 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 885 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 886 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 887 for (i=0; i<m; i++) { 888 x = l[i]; 889 v = mat->v + i; 890 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 891 } 892 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 893 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 894 } 895 if (rr) { 896 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 897 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 898 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 899 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 901 for (i=0; i<n; i++) { 902 x = r[i]; 903 v = mat->v + i*m; 904 for (j=0; j<m; j++) (*v++) *= x; 905 } 906 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 907 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 908 } 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 913 { 914 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 915 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 916 PetscErrorCode ierr; 917 PetscInt i,j; 918 PetscReal sum = 0.0; 919 PetscScalar *v = mat->v; 920 921 PetscFunctionBegin; 922 if (mdn->size == 1) { 923 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 924 } else { 925 if (type == NORM_FROBENIUS) { 926 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 927 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 928 } 929 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 *nrm = PetscSqrtReal(*nrm); 931 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 932 } else if (type == NORM_1) { 933 PetscReal *tmp,*tmp2; 934 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 935 *nrm = 0.0; 936 v = mat->v; 937 for (j=0; j<mdn->A->cmap->n; j++) { 938 for (i=0; i<mdn->A->rmap->n; i++) { 939 tmp[j] += PetscAbsScalar(*v); v++; 940 } 941 } 942 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 943 for (j=0; j<A->cmap->N; j++) { 944 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 945 } 946 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 947 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 948 } else if (type == NORM_INFINITY) { /* max row norm */ 949 PetscReal ntemp; 950 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 951 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 952 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 960 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 961 Mat B; 962 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 963 PetscErrorCode ierr; 964 PetscInt j,i; 965 PetscScalar *v; 966 967 PetscFunctionBegin; 968 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 969 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 970 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 971 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 972 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 973 } else { 974 B = *matout; 975 } 976 977 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 978 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 979 for (i=0; i<m; i++) rwork[i] = rstart + i; 980 for (j=0; j<n; j++) { 981 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 982 v += m; 983 } 984 ierr = PetscFree(rwork);CHKERRQ(ierr); 985 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 988 *matout = B; 989 } else { 990 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 991 } 992 PetscFunctionReturn(0); 993 } 994 995 996 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 997 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 998 999 PetscErrorCode MatSetUp_MPIDense(Mat A) 1000 { 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1009 { 1010 PetscErrorCode ierr; 1011 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1012 1013 PetscFunctionBegin; 1014 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1015 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1020 { 1021 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1022 PetscErrorCode ierr; 1023 1024 PetscFunctionBegin; 1025 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 PetscErrorCode MatRealPart_MPIDense(Mat A) 1030 { 1031 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1032 PetscErrorCode ierr; 1033 1034 PetscFunctionBegin; 1035 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1040 { 1041 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1042 PetscErrorCode ierr; 1043 1044 PetscFunctionBegin; 1045 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1050 { 1051 PetscErrorCode ierr; 1052 PetscScalar *x; 1053 const PetscScalar *a; 1054 PetscInt lda; 1055 1056 PetscFunctionBegin; 1057 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1058 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1059 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1060 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1061 ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr); 1062 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1063 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1068 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1069 { 1070 PetscErrorCode ierr; 1071 PetscInt i,n; 1072 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1073 PetscReal *work; 1074 1075 PetscFunctionBegin; 1076 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1077 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1078 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1079 if (type == NORM_2) { 1080 for (i=0; i<n; i++) work[i] *= work[i]; 1081 } 1082 if (type == NORM_INFINITY) { 1083 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1084 } else { 1085 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1086 } 1087 ierr = PetscFree(work);CHKERRQ(ierr); 1088 if (type == NORM_2) { 1089 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1090 } 1091 PetscFunctionReturn(0); 1092 } 1093 1094 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1095 { 1096 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1097 PetscErrorCode ierr; 1098 PetscScalar *a; 1099 PetscInt m,n,i; 1100 1101 PetscFunctionBegin; 1102 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1103 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1104 for (i=0; i<m*n; i++) { 1105 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1106 } 1107 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1112 1113 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1114 { 1115 PetscFunctionBegin; 1116 *missing = PETSC_FALSE; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1121 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1122 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1123 1124 /* -------------------------------------------------------------------*/ 1125 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1126 MatGetRow_MPIDense, 1127 MatRestoreRow_MPIDense, 1128 MatMult_MPIDense, 1129 /* 4*/ MatMultAdd_MPIDense, 1130 MatMultTranspose_MPIDense, 1131 MatMultTransposeAdd_MPIDense, 1132 0, 1133 0, 1134 0, 1135 /* 10*/ 0, 1136 0, 1137 0, 1138 0, 1139 MatTranspose_MPIDense, 1140 /* 15*/ MatGetInfo_MPIDense, 1141 MatEqual_MPIDense, 1142 MatGetDiagonal_MPIDense, 1143 MatDiagonalScale_MPIDense, 1144 MatNorm_MPIDense, 1145 /* 20*/ MatAssemblyBegin_MPIDense, 1146 MatAssemblyEnd_MPIDense, 1147 MatSetOption_MPIDense, 1148 MatZeroEntries_MPIDense, 1149 /* 24*/ MatZeroRows_MPIDense, 1150 0, 1151 0, 1152 0, 1153 0, 1154 /* 29*/ MatSetUp_MPIDense, 1155 0, 1156 0, 1157 MatGetDiagonalBlock_MPIDense, 1158 0, 1159 /* 34*/ MatDuplicate_MPIDense, 1160 0, 1161 0, 1162 0, 1163 0, 1164 /* 39*/ MatAXPY_MPIDense, 1165 MatCreateSubMatrices_MPIDense, 1166 0, 1167 MatGetValues_MPIDense, 1168 0, 1169 /* 44*/ 0, 1170 MatScale_MPIDense, 1171 MatShift_Basic, 1172 0, 1173 0, 1174 /* 49*/ MatSetRandom_MPIDense, 1175 0, 1176 0, 1177 0, 1178 0, 1179 /* 54*/ 0, 1180 0, 1181 0, 1182 0, 1183 0, 1184 /* 59*/ MatCreateSubMatrix_MPIDense, 1185 MatDestroy_MPIDense, 1186 MatView_MPIDense, 1187 0, 1188 0, 1189 /* 64*/ 0, 1190 0, 1191 0, 1192 0, 1193 0, 1194 /* 69*/ 0, 1195 0, 1196 0, 1197 0, 1198 0, 1199 /* 74*/ 0, 1200 0, 1201 0, 1202 0, 1203 0, 1204 /* 79*/ 0, 1205 0, 1206 0, 1207 0, 1208 /* 83*/ MatLoad_MPIDense, 1209 0, 1210 0, 1211 0, 1212 0, 1213 0, 1214 #if defined(PETSC_HAVE_ELEMENTAL) 1215 /* 89*/ MatMatMult_MPIDense_MPIDense, 1216 MatMatMultSymbolic_MPIDense_MPIDense, 1217 #else 1218 /* 89*/ 0, 1219 0, 1220 #endif 1221 MatMatMultNumeric_MPIDense, 1222 0, 1223 0, 1224 /* 94*/ 0, 1225 MatMatTransposeMult_MPIDense_MPIDense, 1226 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1227 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1228 0, 1229 /* 99*/ 0, 1230 0, 1231 0, 1232 MatConjugate_MPIDense, 1233 0, 1234 /*104*/ 0, 1235 MatRealPart_MPIDense, 1236 MatImaginaryPart_MPIDense, 1237 0, 1238 0, 1239 /*109*/ 0, 1240 0, 1241 0, 1242 MatGetColumnVector_MPIDense, 1243 MatMissingDiagonal_MPIDense, 1244 /*114*/ 0, 1245 0, 1246 0, 1247 0, 1248 0, 1249 /*119*/ 0, 1250 0, 1251 0, 1252 0, 1253 0, 1254 /*124*/ 0, 1255 MatGetColumnNorms_MPIDense, 1256 0, 1257 0, 1258 0, 1259 /*129*/ 0, 1260 MatTransposeMatMult_MPIDense_MPIDense, 1261 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1262 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1263 0, 1264 /*134*/ 0, 1265 0, 1266 0, 1267 0, 1268 0, 1269 /*139*/ 0, 1270 0, 1271 0, 1272 0, 1273 0, 1274 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1275 }; 1276 1277 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1278 { 1279 Mat_MPIDense *a; 1280 PetscErrorCode ierr; 1281 1282 PetscFunctionBegin; 1283 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1284 mat->preallocated = PETSC_TRUE; 1285 /* Note: For now, when data is specified above, this assumes the user correctly 1286 allocates the local dense storage space. We should add error checking. */ 1287 1288 a = (Mat_MPIDense*)mat->data; 1289 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1290 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1291 a->nvec = mat->cmap->n; 1292 1293 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1294 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1295 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1296 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1297 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #if defined(PETSC_HAVE_ELEMENTAL) 1302 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1303 { 1304 Mat mat_elemental; 1305 PetscErrorCode ierr; 1306 PetscScalar *v; 1307 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1308 1309 PetscFunctionBegin; 1310 if (reuse == MAT_REUSE_MATRIX) { 1311 mat_elemental = *newmat; 1312 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1313 } else { 1314 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1315 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1316 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1317 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1318 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1319 } 1320 1321 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1322 for (i=0; i<N; i++) cols[i] = i; 1323 for (i=0; i<m; i++) rows[i] = rstart + i; 1324 1325 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1326 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1327 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1328 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1329 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1330 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1331 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1332 1333 if (reuse == MAT_INPLACE_MATRIX) { 1334 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1335 } else { 1336 *newmat = mat_elemental; 1337 } 1338 PetscFunctionReturn(0); 1339 } 1340 #endif 1341 1342 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1343 { 1344 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1353 { 1354 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1355 PetscErrorCode ierr; 1356 1357 PetscFunctionBegin; 1358 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1363 { 1364 PetscErrorCode ierr; 1365 Mat_MPIDense *mat; 1366 PetscInt m,nloc,N; 1367 1368 PetscFunctionBegin; 1369 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1370 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1371 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1372 PetscInt sum; 1373 1374 if (n == PETSC_DECIDE) { 1375 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1376 } 1377 /* Check sum(n) = N */ 1378 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1379 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1380 1381 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1382 } 1383 1384 /* numeric phase */ 1385 mat = (Mat_MPIDense*)(*outmat)->data; 1386 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1387 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1393 { 1394 Mat_MPIDense *a; 1395 PetscErrorCode ierr; 1396 1397 PetscFunctionBegin; 1398 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1399 mat->data = (void*)a; 1400 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1401 1402 mat->insertmode = NOT_SET_VALUES; 1403 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1404 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1405 1406 /* build cache for off array entries formed */ 1407 a->donotstash = PETSC_FALSE; 1408 1409 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1410 1411 /* stuff used for matrix vector multiply */ 1412 a->lvec = 0; 1413 a->Mvctx = 0; 1414 a->roworiented = PETSC_TRUE; 1415 1416 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1417 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1418 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1419 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1420 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1423 #if defined(PETSC_HAVE_ELEMENTAL) 1424 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1425 #endif 1426 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1427 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1428 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1429 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1430 1431 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1432 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1433 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1436 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 /*MC 1441 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1442 1443 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1444 and MATMPIDENSE otherwise. 1445 1446 Options Database Keys: 1447 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1448 1449 Level: beginner 1450 1451 1452 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1453 M*/ 1454 1455 /*@C 1456 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1457 1458 Not collective 1459 1460 Input Parameters: 1461 . B - the matrix 1462 - data - optional location of matrix data. Set data=NULL for PETSc 1463 to control all matrix memory allocation. 1464 1465 Notes: 1466 The dense format is fully compatible with standard Fortran 77 1467 storage by columns. 1468 1469 The data input variable is intended primarily for Fortran programmers 1470 who wish to allocate their own matrix memory space. Most users should 1471 set data=NULL. 1472 1473 Level: intermediate 1474 1475 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1476 @*/ 1477 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1478 { 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1483 PetscFunctionReturn(0); 1484 } 1485 1486 /*@ 1487 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1488 array provided by the user. This is useful to avoid copying an array 1489 into a matrix 1490 1491 Not Collective 1492 1493 Input Parameters: 1494 + mat - the matrix 1495 - array - the array in column major order 1496 1497 Notes: 1498 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1499 freed when the matrix is destroyed. 1500 1501 Level: developer 1502 1503 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1504 1505 @*/ 1506 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1507 { 1508 PetscErrorCode ierr; 1509 PetscFunctionBegin; 1510 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1511 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1512 PetscFunctionReturn(0); 1513 } 1514 1515 /*@ 1516 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1517 1518 Not Collective 1519 1520 Input Parameters: 1521 . mat - the matrix 1522 1523 Notes: 1524 You can only call this after a call to MatDensePlaceArray() 1525 1526 Level: developer 1527 1528 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1529 1530 @*/ 1531 PetscErrorCode MatDenseResetArray(Mat mat) 1532 { 1533 PetscErrorCode ierr; 1534 PetscFunctionBegin; 1535 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1536 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 /*@C 1541 MatCreateDense - Creates a parallel matrix in dense format. 1542 1543 Collective 1544 1545 Input Parameters: 1546 + comm - MPI communicator 1547 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1548 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1549 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1550 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1551 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1552 to control all matrix memory allocation. 1553 1554 Output Parameter: 1555 . A - the matrix 1556 1557 Notes: 1558 The dense format is fully compatible with standard Fortran 77 1559 storage by columns. 1560 1561 The data input variable is intended primarily for Fortran programmers 1562 who wish to allocate their own matrix memory space. Most users should 1563 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1564 1565 The user MUST specify either the local or global matrix dimensions 1566 (possibly both). 1567 1568 Level: intermediate 1569 1570 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1571 @*/ 1572 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1573 { 1574 PetscErrorCode ierr; 1575 PetscMPIInt size; 1576 1577 PetscFunctionBegin; 1578 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1579 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1580 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1581 if (size > 1) { 1582 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1583 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1584 if (data) { /* user provided data array, so no need to assemble */ 1585 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1586 (*A)->assembled = PETSC_TRUE; 1587 } 1588 } else { 1589 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1590 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1591 } 1592 PetscFunctionReturn(0); 1593 } 1594 1595 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1596 { 1597 Mat mat; 1598 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1599 PetscErrorCode ierr; 1600 1601 PetscFunctionBegin; 1602 *newmat = 0; 1603 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1604 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1605 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1606 a = (Mat_MPIDense*)mat->data; 1607 1608 mat->factortype = A->factortype; 1609 mat->assembled = PETSC_TRUE; 1610 mat->preallocated = PETSC_TRUE; 1611 1612 a->size = oldmat->size; 1613 a->rank = oldmat->rank; 1614 mat->insertmode = NOT_SET_VALUES; 1615 a->nvec = oldmat->nvec; 1616 a->donotstash = oldmat->donotstash; 1617 1618 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1619 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1620 1621 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1622 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1623 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1624 1625 *newmat = mat; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1630 { 1631 PetscErrorCode ierr; 1632 PetscMPIInt rank,size; 1633 const PetscInt *rowners; 1634 PetscInt i,m,n,nz,j,mMax; 1635 PetscScalar *array,*vals,*vals_ptr; 1636 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1637 1638 PetscFunctionBegin; 1639 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1640 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1641 1642 /* determine ownership of rows and columns */ 1643 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1644 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1645 1646 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1647 if (!a->A) { 1648 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1649 } 1650 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1651 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1652 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1653 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1654 if (!rank) { 1655 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1656 1657 /* read in my part of the matrix numerical values */ 1658 ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1659 1660 /* insert into matrix-by row (this is why cannot directly read into array */ 1661 vals_ptr = vals; 1662 for (i=0; i<m; i++) { 1663 for (j=0; j<N; j++) { 1664 array[i + j*m] = *vals_ptr++; 1665 } 1666 } 1667 1668 /* read in other processors and ship out */ 1669 for (i=1; i<size; i++) { 1670 nz = (rowners[i+1] - rowners[i])*N; 1671 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1672 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1673 } 1674 } else { 1675 /* receive numeric values */ 1676 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1677 1678 /* receive message of values*/ 1679 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1680 1681 /* insert into matrix-by row (this is why cannot directly read into array */ 1682 vals_ptr = vals; 1683 for (i=0; i<m; i++) { 1684 for (j=0; j<N; j++) { 1685 array[i + j*m] = *vals_ptr++; 1686 } 1687 } 1688 } 1689 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1690 ierr = PetscFree(vals);CHKERRQ(ierr); 1691 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1692 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1693 PetscFunctionReturn(0); 1694 } 1695 1696 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer) 1697 { 1698 Mat_MPIDense *a; 1699 PetscScalar *vals,*svals; 1700 MPI_Comm comm; 1701 MPI_Status status; 1702 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1703 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1704 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1705 PetscInt i,nz,j,rstart,rend; 1706 int fd; 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1711 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1712 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1713 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1714 if (!rank) { 1715 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 1716 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1717 } 1718 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1719 M = header[1]; N = header[2]; nz = header[3]; 1720 1721 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1722 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1723 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1724 1725 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1726 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1727 1728 /* 1729 Handle case where matrix is stored on disk as a dense matrix 1730 */ 1731 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1732 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 /* determine ownership of all rows */ 1737 if (newmat->rmap->n < 0) { 1738 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1739 } else { 1740 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1741 } 1742 if (newmat->cmap->n < 0) { 1743 n = PETSC_DECIDE; 1744 } else { 1745 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1746 } 1747 1748 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1749 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1750 rowners[0] = 0; 1751 for (i=2; i<=size; i++) { 1752 rowners[i] += rowners[i-1]; 1753 } 1754 rstart = rowners[rank]; 1755 rend = rowners[rank+1]; 1756 1757 /* distribute row lengths to all processors */ 1758 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1759 if (!rank) { 1760 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1761 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 1762 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1763 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1764 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1765 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1766 } else { 1767 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1768 } 1769 1770 if (!rank) { 1771 /* calculate the number of nonzeros on each processor */ 1772 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 1773 for (i=0; i<size; i++) { 1774 for (j=rowners[i]; j< rowners[i+1]; j++) { 1775 procsnz[i] += rowlengths[j]; 1776 } 1777 } 1778 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1779 1780 /* determine max buffer needed and allocate it */ 1781 maxnz = 0; 1782 for (i=0; i<size; i++) { 1783 maxnz = PetscMax(maxnz,procsnz[i]); 1784 } 1785 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1786 1787 /* read in my part of the matrix column indices */ 1788 nz = procsnz[0]; 1789 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1790 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1791 1792 /* read in every one elses and ship off */ 1793 for (i=1; i<size; i++) { 1794 nz = procsnz[i]; 1795 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1796 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1797 } 1798 ierr = PetscFree(cols);CHKERRQ(ierr); 1799 } else { 1800 /* determine buffer space needed for message */ 1801 nz = 0; 1802 for (i=0; i<m; i++) { 1803 nz += ourlens[i]; 1804 } 1805 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1806 1807 /* receive message of column indices*/ 1808 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1809 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1810 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1811 } 1812 1813 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1814 a = (Mat_MPIDense*)newmat->data; 1815 if (!a->A) { 1816 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1817 } 1818 1819 if (!rank) { 1820 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1821 1822 /* read in my part of the matrix numerical values */ 1823 nz = procsnz[0]; 1824 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1825 1826 /* insert into matrix */ 1827 jj = rstart; 1828 smycols = mycols; 1829 svals = vals; 1830 for (i=0; i<m; i++) { 1831 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1832 smycols += ourlens[i]; 1833 svals += ourlens[i]; 1834 jj++; 1835 } 1836 1837 /* read in other processors and ship out */ 1838 for (i=1; i<size; i++) { 1839 nz = procsnz[i]; 1840 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1841 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1842 } 1843 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1844 } else { 1845 /* receive numeric values */ 1846 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1847 1848 /* receive message of values*/ 1849 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1850 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1851 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1852 1853 /* insert into matrix */ 1854 jj = rstart; 1855 smycols = mycols; 1856 svals = vals; 1857 for (i=0; i<m; i++) { 1858 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1859 smycols += ourlens[i]; 1860 svals += ourlens[i]; 1861 jj++; 1862 } 1863 } 1864 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1865 ierr = PetscFree(vals);CHKERRQ(ierr); 1866 ierr = PetscFree(mycols);CHKERRQ(ierr); 1867 ierr = PetscFree(rowners);CHKERRQ(ierr); 1868 1869 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1870 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1875 { 1876 PetscErrorCode ierr; 1877 PetscBool isbinary; 1878 #if defined(PETSC_HAVE_HDF5) 1879 PetscBool ishdf5; 1880 #endif 1881 1882 PetscFunctionBegin; 1883 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1884 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1885 /* force binary viewer to load .info file if it has not yet done so */ 1886 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1887 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1888 #if defined(PETSC_HAVE_HDF5) 1889 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1890 #endif 1891 if (isbinary) { 1892 ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr); 1893 #if defined(PETSC_HAVE_HDF5) 1894 } else if (ishdf5) { 1895 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1896 #endif 1897 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1898 PetscFunctionReturn(0); 1899 } 1900 1901 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1902 { 1903 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1904 Mat a,b; 1905 PetscBool flg; 1906 PetscErrorCode ierr; 1907 1908 PetscFunctionBegin; 1909 a = matA->A; 1910 b = matB->A; 1911 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1912 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1917 { 1918 PetscErrorCode ierr; 1919 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1920 Mat_TransMatMultDense *atb = a->atbdense; 1921 1922 PetscFunctionBegin; 1923 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1924 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1925 ierr = PetscFree(atb);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1930 { 1931 PetscErrorCode ierr; 1932 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1933 Mat_MatTransMultDense *abt = a->abtdense; 1934 1935 PetscFunctionBegin; 1936 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1937 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1938 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1939 ierr = PetscFree(abt);CHKERRQ(ierr); 1940 PetscFunctionReturn(0); 1941 } 1942 1943 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1944 { 1945 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1946 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1947 Mat_TransMatMultDense *atb = c->atbdense; 1948 PetscErrorCode ierr; 1949 MPI_Comm comm; 1950 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1951 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1952 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1953 PetscScalar _DOne=1.0,_DZero=0.0; 1954 PetscBLASInt am,an,bn,aN; 1955 const PetscInt *ranges; 1956 1957 PetscFunctionBegin; 1958 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1959 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1960 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1961 1962 /* compute atbarray = aseq^T * bseq */ 1963 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1964 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1965 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1966 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1967 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1968 1969 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1970 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1971 1972 /* arrange atbarray into sendbuf */ 1973 k = 0; 1974 for (proc=0; proc<size; proc++) { 1975 for (j=0; j<cN; j++) { 1976 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1977 } 1978 } 1979 /* sum all atbarray to local values of C */ 1980 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1981 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1982 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1987 { 1988 PetscErrorCode ierr; 1989 Mat Cdense; 1990 MPI_Comm comm; 1991 PetscMPIInt size; 1992 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1993 Mat_MPIDense *c; 1994 Mat_TransMatMultDense *atb; 1995 1996 PetscFunctionBegin; 1997 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1998 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1999 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2000 } 2001 2002 /* create matrix product Cdense */ 2003 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2004 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2005 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2006 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2007 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2008 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2009 *C = Cdense; 2010 2011 /* create data structure for reuse Cdense */ 2012 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2013 ierr = PetscNew(&atb);CHKERRQ(ierr); 2014 cM = Cdense->rmap->N; 2015 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 2016 2017 c = (Mat_MPIDense*)Cdense->data; 2018 c->atbdense = atb; 2019 atb->destroy = Cdense->ops->destroy; 2020 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2021 PetscFunctionReturn(0); 2022 } 2023 2024 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2025 { 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 if (scall == MAT_INITIAL_MATRIX) { 2030 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2031 } 2032 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 2037 { 2038 PetscErrorCode ierr; 2039 Mat Cdense; 2040 MPI_Comm comm; 2041 PetscMPIInt i, size; 2042 PetscInt maxRows, bufsiz; 2043 Mat_MPIDense *c; 2044 PetscMPIInt tag; 2045 const char *algTypes[2] = {"allgatherv","cyclic"}; 2046 PetscInt alg, nalg = 2; 2047 Mat_MatTransMultDense *abt; 2048 2049 PetscFunctionBegin; 2050 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2051 alg = 0; /* default is allgatherv */ 2052 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2053 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 2054 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2055 if (A->cmap->N != B->cmap->N) { 2056 SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 2057 } 2058 2059 /* create matrix product Cdense */ 2060 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2061 ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2062 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2063 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2064 ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 2065 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2066 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 *C = Cdense; 2068 2069 /* create data structure for reuse Cdense */ 2070 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2071 ierr = PetscNew(&abt);CHKERRQ(ierr); 2072 abt->tag = tag; 2073 abt->alg = alg; 2074 switch (alg) { 2075 case 1: 2076 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2077 bufsiz = A->cmap->N * maxRows; 2078 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2079 break; 2080 default: 2081 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2082 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2083 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2084 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2085 break; 2086 } 2087 2088 c = (Mat_MPIDense*)Cdense->data; 2089 c->abtdense = abt; 2090 abt->destroy = Cdense->ops->destroy; 2091 Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2092 PetscFunctionReturn(0); 2093 } 2094 2095 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2096 { 2097 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2098 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2099 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2100 Mat_MatTransMultDense *abt = c->abtdense; 2101 PetscErrorCode ierr; 2102 MPI_Comm comm; 2103 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2104 PetscScalar *sendbuf, *recvbuf=0, *carray; 2105 PetscInt i,cK=A->cmap->N,k,j,bn; 2106 PetscScalar _DOne=1.0,_DZero=0.0; 2107 PetscBLASInt cm, cn, ck; 2108 MPI_Request reqs[2]; 2109 const PetscInt *ranges; 2110 2111 PetscFunctionBegin; 2112 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2113 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2114 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2115 2116 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2117 bn = B->rmap->n; 2118 if (bseq->lda == bn) { 2119 sendbuf = bseq->v; 2120 } else { 2121 sendbuf = abt->buf[0]; 2122 for (k = 0, i = 0; i < cK; i++) { 2123 for (j = 0; j < bn; j++, k++) { 2124 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2125 } 2126 } 2127 } 2128 if (size > 1) { 2129 sendto = (rank + size - 1) % size; 2130 recvfrom = (rank + size + 1) % size; 2131 } else { 2132 sendto = recvfrom = 0; 2133 } 2134 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2136 recvisfrom = rank; 2137 for (i = 0; i < size; i++) { 2138 /* we have finished receiving in sending, bufs can be read/modified */ 2139 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2140 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2141 2142 if (nextrecvisfrom != rank) { 2143 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2144 sendsiz = cK * bn; 2145 recvsiz = cK * nextbn; 2146 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2147 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2148 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2149 } 2150 2151 /* local aseq * sendbuf^T */ 2152 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2153 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 2154 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 2155 2156 if (nextrecvisfrom != rank) { 2157 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2158 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2159 } 2160 bn = nextbn; 2161 recvisfrom = nextrecvisfrom; 2162 sendbuf = recvbuf; 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2168 { 2169 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2170 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2171 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2172 Mat_MatTransMultDense *abt = c->abtdense; 2173 PetscErrorCode ierr; 2174 MPI_Comm comm; 2175 PetscMPIInt rank,size; 2176 PetscScalar *sendbuf, *recvbuf; 2177 PetscInt i,cK=A->cmap->N,k,j,bn; 2178 PetscScalar _DOne=1.0,_DZero=0.0; 2179 PetscBLASInt cm, cn, ck; 2180 2181 PetscFunctionBegin; 2182 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2183 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2185 2186 /* copy transpose of B into buf[0] */ 2187 bn = B->rmap->n; 2188 sendbuf = abt->buf[0]; 2189 recvbuf = abt->buf[1]; 2190 for (k = 0, j = 0; j < bn; j++) { 2191 for (i = 0; i < cK; i++, k++) { 2192 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2193 } 2194 } 2195 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2196 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2197 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2198 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2199 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 2200 PetscFunctionReturn(0); 2201 } 2202 2203 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2204 { 2205 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2206 Mat_MatTransMultDense *abt = c->abtdense; 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 switch (abt->alg) { 2211 case 1: 2212 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2213 break; 2214 default: 2215 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2216 break; 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 2222 { 2223 PetscErrorCode ierr; 2224 2225 PetscFunctionBegin; 2226 if (scall == MAT_INITIAL_MATRIX) { 2227 ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2228 } 2229 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2234 { 2235 PetscErrorCode ierr; 2236 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2237 Mat_MatMultDense *ab = a->abdense; 2238 2239 PetscFunctionBegin; 2240 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2241 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2242 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2243 2244 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2245 ierr = PetscFree(ab);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } 2248 2249 #if defined(PETSC_HAVE_ELEMENTAL) 2250 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2251 { 2252 PetscErrorCode ierr; 2253 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2254 Mat_MatMultDense *ab=c->abdense; 2255 2256 PetscFunctionBegin; 2257 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2258 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2259 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2260 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 2265 { 2266 PetscErrorCode ierr; 2267 Mat Ae,Be,Ce; 2268 Mat_MPIDense *c; 2269 Mat_MatMultDense *ab; 2270 2271 PetscFunctionBegin; 2272 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2273 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2274 } 2275 2276 /* convert A and B to Elemental matrices Ae and Be */ 2277 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2278 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2279 2280 /* Ce = Ae*Be */ 2281 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2282 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2283 2284 /* convert Ce to C */ 2285 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2286 2287 /* create data structure for reuse Cdense */ 2288 ierr = PetscNew(&ab);CHKERRQ(ierr); 2289 c = (Mat_MPIDense*)(*C)->data; 2290 c->abdense = ab; 2291 2292 ab->Ae = Ae; 2293 ab->Be = Be; 2294 ab->Ce = Ce; 2295 ab->destroy = (*C)->ops->destroy; 2296 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2297 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2302 { 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2307 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2308 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2309 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2310 } else { 2311 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2312 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2313 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2314 } 2315 PetscFunctionReturn(0); 2316 } 2317 #endif 2318 2319