1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 198 { 199 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 200 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 201 PetscErrorCode ierr; 202 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 203 const PetscInt *irow,*icol; 204 PetscScalar *av,*bv,*v = lmat->v; 205 Mat newmat; 206 IS iscol_local; 207 MPI_Comm comm_is,comm_mat; 208 209 PetscFunctionBegin; 210 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 211 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 212 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 213 214 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 215 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 216 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 218 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 219 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 220 221 /* No parallel redistribution currently supported! Should really check each index set 222 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 223 original matrix! */ 224 225 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 226 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 227 228 /* Check submatrix call */ 229 if (scall == MAT_REUSE_MATRIX) { 230 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 231 /* Really need to test rows and column sizes! */ 232 newmat = *B; 233 } else { 234 /* Create and fill new matrix */ 235 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 236 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 237 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 238 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 239 } 240 241 /* Now extract the data pointers and do the copy, column at a time */ 242 newmatd = (Mat_MPIDense*)newmat->data; 243 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 244 245 for (i=0; i<Ncols; i++) { 246 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 247 for (j=0; j<nrows; j++) { 248 *bv++ = av[irow[j] - rstart]; 249 } 250 } 251 252 /* Assemble the matrices so that the correct flags are set */ 253 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 256 /* Free work space */ 257 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 259 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 260 *B = newmat; 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 287 MPI_Comm comm; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 294 /* make sure all processors are either in INSERTMODE or ADDMODE */ 295 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 296 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 297 mat->insertmode = addv; /* in case this processor had no cache */ 298 299 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 300 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 301 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 306 { 307 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 308 PetscErrorCode ierr; 309 PetscInt i,*row,*col,flg,j,rstart,ncols; 310 PetscMPIInt n; 311 PetscScalar *val; 312 313 PetscFunctionBegin; 314 /* wait on receives */ 315 while (1) { 316 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 317 if (!flg) break; 318 319 for (i=0; i<n;) { 320 /* Now identify the consecutive vals belonging to the same row */ 321 for (j=i,rstart=row[j]; j<n; j++) { 322 if (row[j] != rstart) break; 323 } 324 if (j < n) ncols = j-i; 325 else ncols = n-i; 326 /* Now assemble all these values with a single function call */ 327 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 328 i = j; 329 } 330 } 331 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 332 333 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 334 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 335 336 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 337 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 343 { 344 PetscErrorCode ierr; 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 347 PetscFunctionBegin; 348 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 /* the code does not do the diagonal entries correctly unless the 353 matrix is square and the column and row owerships are identical. 354 This is a BUG. The only way to fix it seems to be to access 355 mdn->A and mdn->B directly and not through the MatZeroRows() 356 routine. 357 */ 358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 359 { 360 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 361 PetscErrorCode ierr; 362 PetscInt i,*owners = A->rmap->range; 363 PetscInt *sizes,j,idx,nsends; 364 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 365 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 366 PetscInt *lens,*lrows,*values; 367 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 368 MPI_Comm comm; 369 MPI_Request *send_waits,*recv_waits; 370 MPI_Status recv_status,*send_status; 371 PetscBool found; 372 const PetscScalar *xx; 373 PetscScalar *bb; 374 375 PetscFunctionBegin; 376 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 377 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 378 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 379 /* first count number of contributors to each processor */ 380 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 381 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 382 for (i=0; i<N; i++) { 383 idx = rows[i]; 384 found = PETSC_FALSE; 385 for (j=0; j<size; j++) { 386 if (idx >= owners[j] && idx < owners[j+1]) { 387 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 388 } 389 } 390 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 391 } 392 nsends = 0; 393 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 394 395 /* inform other processors of number of messages and max length*/ 396 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 397 398 /* post receives: */ 399 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 400 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 401 for (i=0; i<nrecvs; i++) { 402 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 403 } 404 405 /* do sends: 406 1) starts[i] gives the starting index in svalues for stuff going to 407 the ith processor 408 */ 409 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 410 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 411 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 412 413 starts[0] = 0; 414 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 415 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 416 417 starts[0] = 0; 418 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 419 count = 0; 420 for (i=0; i<size; i++) { 421 if (sizes[2*i+1]) { 422 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 423 } 424 } 425 ierr = PetscFree(starts);CHKERRQ(ierr); 426 427 base = owners[rank]; 428 429 /* wait on receives */ 430 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 431 count = nrecvs; 432 slen = 0; 433 while (count) { 434 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 435 /* unpack receives into our local space */ 436 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 437 438 source[imdex] = recv_status.MPI_SOURCE; 439 lens[imdex] = n; 440 slen += n; 441 count--; 442 } 443 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 444 445 /* move the data into the send scatter */ 446 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 447 count = 0; 448 for (i=0; i<nrecvs; i++) { 449 values = rvalues + i*nmax; 450 for (j=0; j<lens[i]; j++) { 451 lrows[count++] = values[j] - base; 452 } 453 } 454 ierr = PetscFree(rvalues);CHKERRQ(ierr); 455 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 456 ierr = PetscFree(owner);CHKERRQ(ierr); 457 ierr = PetscFree(sizes);CHKERRQ(ierr); 458 459 /* fix right hand side if needed */ 460 if (x && b) { 461 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 462 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 463 for (i=0; i<slen; i++) { 464 bb[lrows[i]] = diag*xx[lrows[i]]; 465 } 466 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 467 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 468 } 469 470 /* actually zap the local rows */ 471 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 472 if (diag != 0.0) { 473 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 474 PetscInt m = ll->lda, i; 475 476 for (i=0; i<slen; i++) { 477 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 478 } 479 } 480 ierr = PetscFree(lrows);CHKERRQ(ierr); 481 482 /* wait on sends */ 483 if (nsends) { 484 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 485 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 486 ierr = PetscFree(send_status);CHKERRQ(ierr); 487 } 488 ierr = PetscFree(send_waits);CHKERRQ(ierr); 489 ierr = PetscFree(svalues);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 497 498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 501 PetscErrorCode ierr; 502 503 PetscFunctionBegin; 504 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 523 { 524 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 525 PetscErrorCode ierr; 526 PetscScalar zero = 0.0; 527 528 PetscFunctionBegin; 529 ierr = VecSet(yy,zero);CHKERRQ(ierr); 530 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 531 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 537 { 538 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 543 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 544 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 545 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 552 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 553 PetscErrorCode ierr; 554 PetscInt len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 557 PetscFunctionBegin; 558 ierr = VecSet(v,zero);CHKERRQ(ierr); 559 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 560 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 561 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 563 radd = A->rmap->rstart*m; 564 for (i=0; i<len; i++) { 565 x[i] = aloc->v[radd + i*m + i]; 566 } 567 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 PetscErrorCode MatDestroy_MPIDense(Mat mat) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 579 #endif 580 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 581 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 582 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 583 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 584 585 ierr = PetscFree(mat->data);CHKERRQ(ierr); 586 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 587 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 595 #if defined(PETSC_HAVE_ELEMENTAL) 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 597 #endif 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 609 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 614 { 615 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 616 PetscErrorCode ierr; 617 PetscViewerFormat format; 618 int fd; 619 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 620 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 621 PetscScalar *work,*v,*vv; 622 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 623 624 PetscFunctionBegin; 625 if (mdn->size == 1) { 626 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 627 } else { 628 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 629 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 630 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 631 632 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 633 if (format == PETSC_VIEWER_NATIVE) { 634 635 if (!rank) { 636 /* store the matrix as a dense matrix */ 637 header[0] = MAT_FILE_CLASSID; 638 header[1] = mat->rmap->N; 639 header[2] = N; 640 header[3] = MATRIX_BINARY_FORMAT_DENSE; 641 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 642 643 /* get largest work array needed for transposing array */ 644 mmax = mat->rmap->n; 645 for (i=1; i<size; i++) { 646 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 647 } 648 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 649 650 /* write out local array, by rows */ 651 m = mat->rmap->n; 652 v = a->v; 653 for (j=0; j<N; j++) { 654 for (i=0; i<m; i++) { 655 work[j + i*N] = *v++; 656 } 657 } 658 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 659 /* get largest work array to receive messages from other processes, excludes process zero */ 660 mmax = 0; 661 for (i=1; i<size; i++) { 662 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 663 } 664 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 665 for (k = 1; k < size; k++) { 666 v = vv; 667 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 668 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 669 670 for (j = 0; j < N; j++) { 671 for (i = 0; i < m; i++) { 672 work[j + i*N] = *v++; 673 } 674 } 675 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 676 } 677 ierr = PetscFree(work);CHKERRQ(ierr); 678 ierr = PetscFree(vv);CHKERRQ(ierr); 679 } else { 680 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 681 } 682 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 683 } 684 PetscFunctionReturn(0); 685 } 686 687 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 688 689 #include <petscdraw.h> 690 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 691 { 692 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 693 PetscErrorCode ierr; 694 PetscMPIInt rank = mdn->rank; 695 PetscViewerType vtype; 696 PetscBool iascii,isdraw; 697 PetscViewer sviewer; 698 PetscViewerFormat format; 699 700 PetscFunctionBegin; 701 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 702 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 703 if (iascii) { 704 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 705 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 706 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 707 MatInfo info; 708 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 709 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 710 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 711 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 712 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 713 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } else if (format == PETSC_VIEWER_ASCII_INFO) { 716 PetscFunctionReturn(0); 717 } 718 } else if (isdraw) { 719 PetscDraw draw; 720 PetscBool isnull; 721 722 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 723 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 724 if (isnull) PetscFunctionReturn(0); 725 } 726 727 { 728 /* assemble the entire matrix onto first processor. */ 729 Mat A; 730 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 731 PetscInt *cols; 732 PetscScalar *vals; 733 734 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 735 if (!rank) { 736 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 737 } else { 738 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 739 } 740 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 741 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 742 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 743 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 744 745 /* Copy the matrix ... This isn't the most efficient means, 746 but it's quick for now */ 747 A->insertmode = INSERT_VALUES; 748 749 row = mat->rmap->rstart; 750 m = mdn->A->rmap->n; 751 for (i=0; i<m; i++) { 752 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 753 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 754 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 755 row++; 756 } 757 758 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 759 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 760 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 761 if (!rank) { 762 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 763 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 764 } 765 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 766 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 767 ierr = MatDestroy(&A);CHKERRQ(ierr); 768 } 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 773 { 774 PetscErrorCode ierr; 775 PetscBool iascii,isbinary,isdraw,issocket; 776 777 PetscFunctionBegin; 778 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 779 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 780 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 781 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 782 783 if (iascii || issocket || isdraw) { 784 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 785 } else if (isbinary) { 786 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 792 { 793 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 794 Mat mdn = mat->A; 795 PetscErrorCode ierr; 796 PetscLogDouble isend[5],irecv[5]; 797 798 PetscFunctionBegin; 799 info->block_size = 1.0; 800 801 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 802 803 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 804 isend[3] = info->memory; isend[4] = info->mallocs; 805 if (flag == MAT_LOCAL) { 806 info->nz_used = isend[0]; 807 info->nz_allocated = isend[1]; 808 info->nz_unneeded = isend[2]; 809 info->memory = isend[3]; 810 info->mallocs = isend[4]; 811 } else if (flag == MAT_GLOBAL_MAX) { 812 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 813 814 info->nz_used = irecv[0]; 815 info->nz_allocated = irecv[1]; 816 info->nz_unneeded = irecv[2]; 817 info->memory = irecv[3]; 818 info->mallocs = irecv[4]; 819 } else if (flag == MAT_GLOBAL_SUM) { 820 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 821 822 info->nz_used = irecv[0]; 823 info->nz_allocated = irecv[1]; 824 info->nz_unneeded = irecv[2]; 825 info->memory = irecv[3]; 826 info->mallocs = irecv[4]; 827 } 828 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 829 info->fill_ratio_needed = 0; 830 info->factor_mallocs = 0; 831 PetscFunctionReturn(0); 832 } 833 834 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 835 { 836 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 837 PetscErrorCode ierr; 838 839 PetscFunctionBegin; 840 switch (op) { 841 case MAT_NEW_NONZERO_LOCATIONS: 842 case MAT_NEW_NONZERO_LOCATION_ERR: 843 case MAT_NEW_NONZERO_ALLOCATION_ERR: 844 MatCheckPreallocated(A,1); 845 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 846 break; 847 case MAT_ROW_ORIENTED: 848 MatCheckPreallocated(A,1); 849 a->roworiented = flg; 850 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 851 break; 852 case MAT_NEW_DIAGONALS: 853 case MAT_KEEP_NONZERO_PATTERN: 854 case MAT_USE_HASH_TABLE: 855 case MAT_SORTED_FULL: 856 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 857 break; 858 case MAT_IGNORE_OFF_PROC_ENTRIES: 859 a->donotstash = flg; 860 break; 861 case MAT_SYMMETRIC: 862 case MAT_STRUCTURALLY_SYMMETRIC: 863 case MAT_HERMITIAN: 864 case MAT_SYMMETRY_ETERNAL: 865 case MAT_IGNORE_LOWER_TRIANGULAR: 866 case MAT_IGNORE_ZERO_ENTRIES: 867 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 868 break; 869 default: 870 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 871 } 872 PetscFunctionReturn(0); 873 } 874 875 876 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 877 { 878 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 879 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 880 const PetscScalar *l,*r; 881 PetscScalar x,*v; 882 PetscErrorCode ierr; 883 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 884 885 PetscFunctionBegin; 886 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 887 if (ll) { 888 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 889 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 890 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 891 for (i=0; i<m; i++) { 892 x = l[i]; 893 v = mat->v + i; 894 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 895 } 896 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 897 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 898 } 899 if (rr) { 900 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 901 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 902 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 905 for (i=0; i<n; i++) { 906 x = r[i]; 907 v = mat->v + i*m; 908 for (j=0; j<m; j++) (*v++) *= x; 909 } 910 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 911 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 917 { 918 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 919 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 920 PetscErrorCode ierr; 921 PetscInt i,j; 922 PetscReal sum = 0.0; 923 PetscScalar *v = mat->v; 924 925 PetscFunctionBegin; 926 if (mdn->size == 1) { 927 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 928 } else { 929 if (type == NORM_FROBENIUS) { 930 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 931 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 932 } 933 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 934 *nrm = PetscSqrtReal(*nrm); 935 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 936 } else if (type == NORM_1) { 937 PetscReal *tmp,*tmp2; 938 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 939 *nrm = 0.0; 940 v = mat->v; 941 for (j=0; j<mdn->A->cmap->n; j++) { 942 for (i=0; i<mdn->A->rmap->n; i++) { 943 tmp[j] += PetscAbsScalar(*v); v++; 944 } 945 } 946 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 for (j=0; j<A->cmap->N; j++) { 948 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 949 } 950 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 951 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 952 } else if (type == NORM_INFINITY) { /* max row norm */ 953 PetscReal ntemp; 954 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 955 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 956 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 957 } 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 962 { 963 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 964 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 965 Mat B; 966 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 967 PetscErrorCode ierr; 968 PetscInt j,i; 969 PetscScalar *v; 970 971 PetscFunctionBegin; 972 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 973 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 974 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 975 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 976 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 977 } else { 978 B = *matout; 979 } 980 981 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 982 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 983 for (i=0; i<m; i++) rwork[i] = rstart + i; 984 for (j=0; j<n; j++) { 985 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 986 v += m; 987 } 988 ierr = PetscFree(rwork);CHKERRQ(ierr); 989 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 992 *matout = B; 993 } else { 994 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 995 } 996 PetscFunctionReturn(0); 997 } 998 999 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 1000 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1001 1002 PetscErrorCode MatSetUp_MPIDense(Mat A) 1003 { 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1012 { 1013 PetscErrorCode ierr; 1014 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1015 1016 PetscFunctionBegin; 1017 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1018 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1023 { 1024 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1025 PetscErrorCode ierr; 1026 1027 PetscFunctionBegin; 1028 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 PetscErrorCode MatRealPart_MPIDense(Mat A) 1033 { 1034 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1035 PetscErrorCode ierr; 1036 1037 PetscFunctionBegin; 1038 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1043 { 1044 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1053 { 1054 PetscErrorCode ierr; 1055 PetscScalar *x; 1056 const PetscScalar *a; 1057 PetscInt lda; 1058 1059 PetscFunctionBegin; 1060 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1061 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 1062 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1063 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1064 ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr); 1065 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1066 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 1067 PetscFunctionReturn(0); 1068 } 1069 1070 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1071 1072 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1073 { 1074 PetscErrorCode ierr; 1075 PetscInt i,n; 1076 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1077 PetscReal *work; 1078 1079 PetscFunctionBegin; 1080 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1081 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1082 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1083 if (type == NORM_2) { 1084 for (i=0; i<n; i++) work[i] *= work[i]; 1085 } 1086 if (type == NORM_INFINITY) { 1087 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1088 } else { 1089 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1090 } 1091 ierr = PetscFree(work);CHKERRQ(ierr); 1092 if (type == NORM_2) { 1093 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1094 } 1095 PetscFunctionReturn(0); 1096 } 1097 1098 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1099 { 1100 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1101 PetscErrorCode ierr; 1102 PetscScalar *a; 1103 PetscInt m,n,i; 1104 1105 PetscFunctionBegin; 1106 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1107 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1108 for (i=0; i<m*n; i++) { 1109 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1110 } 1111 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1116 1117 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1118 { 1119 PetscFunctionBegin; 1120 *missing = PETSC_FALSE; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1125 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1126 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1127 1128 /* -------------------------------------------------------------------*/ 1129 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1130 MatGetRow_MPIDense, 1131 MatRestoreRow_MPIDense, 1132 MatMult_MPIDense, 1133 /* 4*/ MatMultAdd_MPIDense, 1134 MatMultTranspose_MPIDense, 1135 MatMultTransposeAdd_MPIDense, 1136 0, 1137 0, 1138 0, 1139 /* 10*/ 0, 1140 0, 1141 0, 1142 0, 1143 MatTranspose_MPIDense, 1144 /* 15*/ MatGetInfo_MPIDense, 1145 MatEqual_MPIDense, 1146 MatGetDiagonal_MPIDense, 1147 MatDiagonalScale_MPIDense, 1148 MatNorm_MPIDense, 1149 /* 20*/ MatAssemblyBegin_MPIDense, 1150 MatAssemblyEnd_MPIDense, 1151 MatSetOption_MPIDense, 1152 MatZeroEntries_MPIDense, 1153 /* 24*/ MatZeroRows_MPIDense, 1154 0, 1155 0, 1156 0, 1157 0, 1158 /* 29*/ MatSetUp_MPIDense, 1159 0, 1160 0, 1161 MatGetDiagonalBlock_MPIDense, 1162 0, 1163 /* 34*/ MatDuplicate_MPIDense, 1164 0, 1165 0, 1166 0, 1167 0, 1168 /* 39*/ MatAXPY_MPIDense, 1169 MatCreateSubMatrices_MPIDense, 1170 0, 1171 MatGetValues_MPIDense, 1172 0, 1173 /* 44*/ 0, 1174 MatScale_MPIDense, 1175 MatShift_Basic, 1176 0, 1177 0, 1178 /* 49*/ MatSetRandom_MPIDense, 1179 0, 1180 0, 1181 0, 1182 0, 1183 /* 54*/ 0, 1184 0, 1185 0, 1186 0, 1187 0, 1188 /* 59*/ MatCreateSubMatrix_MPIDense, 1189 MatDestroy_MPIDense, 1190 MatView_MPIDense, 1191 0, 1192 0, 1193 /* 64*/ 0, 1194 0, 1195 0, 1196 0, 1197 0, 1198 /* 69*/ 0, 1199 0, 1200 0, 1201 0, 1202 0, 1203 /* 74*/ 0, 1204 0, 1205 0, 1206 0, 1207 0, 1208 /* 79*/ 0, 1209 0, 1210 0, 1211 0, 1212 /* 83*/ MatLoad_MPIDense, 1213 0, 1214 0, 1215 0, 1216 0, 1217 0, 1218 #if defined(PETSC_HAVE_ELEMENTAL) 1219 /* 89*/ MatMatMult_MPIDense_MPIDense, 1220 MatMatMultSymbolic_MPIDense_MPIDense, 1221 #else 1222 /* 89*/ 0, 1223 0, 1224 #endif 1225 MatMatMultNumeric_MPIDense, 1226 0, 1227 0, 1228 /* 94*/ 0, 1229 MatMatTransposeMult_MPIDense_MPIDense, 1230 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1231 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1232 0, 1233 /* 99*/ 0, 1234 0, 1235 0, 1236 MatConjugate_MPIDense, 1237 0, 1238 /*104*/ 0, 1239 MatRealPart_MPIDense, 1240 MatImaginaryPart_MPIDense, 1241 0, 1242 0, 1243 /*109*/ 0, 1244 0, 1245 0, 1246 MatGetColumnVector_MPIDense, 1247 MatMissingDiagonal_MPIDense, 1248 /*114*/ 0, 1249 0, 1250 0, 1251 0, 1252 0, 1253 /*119*/ 0, 1254 0, 1255 0, 1256 0, 1257 0, 1258 /*124*/ 0, 1259 MatGetColumnNorms_MPIDense, 1260 0, 1261 0, 1262 0, 1263 /*129*/ 0, 1264 MatTransposeMatMult_MPIDense_MPIDense, 1265 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1266 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1267 0, 1268 /*134*/ 0, 1269 0, 1270 0, 1271 0, 1272 0, 1273 /*139*/ 0, 1274 0, 1275 0, 1276 0, 1277 0, 1278 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1279 }; 1280 1281 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1282 { 1283 Mat_MPIDense *a; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1288 mat->preallocated = PETSC_TRUE; 1289 /* Note: For now, when data is specified above, this assumes the user correctly 1290 allocates the local dense storage space. We should add error checking. */ 1291 1292 a = (Mat_MPIDense*)mat->data; 1293 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1294 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1295 a->nvec = mat->cmap->n; 1296 1297 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1298 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1299 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1300 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1301 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #if defined(PETSC_HAVE_ELEMENTAL) 1306 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1307 { 1308 Mat mat_elemental; 1309 PetscErrorCode ierr; 1310 PetscScalar *v; 1311 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1312 1313 PetscFunctionBegin; 1314 if (reuse == MAT_REUSE_MATRIX) { 1315 mat_elemental = *newmat; 1316 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1317 } else { 1318 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1319 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1320 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1321 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1322 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1323 } 1324 1325 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1326 for (i=0; i<N; i++) cols[i] = i; 1327 for (i=0; i<m; i++) rows[i] = rstart + i; 1328 1329 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1330 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1331 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1332 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1333 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1334 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1335 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1336 1337 if (reuse == MAT_INPLACE_MATRIX) { 1338 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1339 } else { 1340 *newmat = mat_elemental; 1341 } 1342 PetscFunctionReturn(0); 1343 } 1344 #endif 1345 1346 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1347 { 1348 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1357 { 1358 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1367 { 1368 PetscErrorCode ierr; 1369 Mat_MPIDense *mat; 1370 PetscInt m,nloc,N; 1371 1372 PetscFunctionBegin; 1373 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1374 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1375 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1376 PetscInt sum; 1377 1378 if (n == PETSC_DECIDE) { 1379 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1380 } 1381 /* Check sum(n) = N */ 1382 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1383 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1384 1385 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1386 } 1387 1388 /* numeric phase */ 1389 mat = (Mat_MPIDense*)(*outmat)->data; 1390 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1391 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1397 { 1398 Mat_MPIDense *a; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1403 mat->data = (void*)a; 1404 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1405 1406 mat->insertmode = NOT_SET_VALUES; 1407 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1408 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1409 1410 /* build cache for off array entries formed */ 1411 a->donotstash = PETSC_FALSE; 1412 1413 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1414 1415 /* stuff used for matrix vector multiply */ 1416 a->lvec = 0; 1417 a->Mvctx = 0; 1418 a->roworiented = PETSC_TRUE; 1419 1420 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1421 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1422 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1423 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1424 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1425 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1426 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1427 #if defined(PETSC_HAVE_ELEMENTAL) 1428 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1429 #endif 1430 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1431 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1432 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1433 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",MatMatMult_Nest_Dense);CHKERRQ(ierr); 1435 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1436 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1437 1438 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1439 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1440 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1441 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1442 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1443 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /*MC 1448 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1449 1450 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1451 and MATMPIDENSE otherwise. 1452 1453 Options Database Keys: 1454 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1455 1456 Level: beginner 1457 1458 1459 .seealso: MATSEQDENSE,MATMPIDENSE 1460 M*/ 1461 1462 /*@C 1463 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1464 1465 Not collective 1466 1467 Input Parameters: 1468 . B - the matrix 1469 - data - optional location of matrix data. Set data=NULL for PETSc 1470 to control all matrix memory allocation. 1471 1472 Notes: 1473 The dense format is fully compatible with standard Fortran 77 1474 storage by columns. 1475 1476 The data input variable is intended primarily for Fortran programmers 1477 who wish to allocate their own matrix memory space. Most users should 1478 set data=NULL. 1479 1480 Level: intermediate 1481 1482 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1483 @*/ 1484 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1485 { 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 /*@ 1494 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1495 array provided by the user. This is useful to avoid copying an array 1496 into a matrix 1497 1498 Not Collective 1499 1500 Input Parameters: 1501 + mat - the matrix 1502 - array - the array in column major order 1503 1504 Notes: 1505 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1506 freed when the matrix is destroyed. 1507 1508 Level: developer 1509 1510 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1511 1512 @*/ 1513 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1514 { 1515 PetscErrorCode ierr; 1516 PetscFunctionBegin; 1517 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1518 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 /*@ 1523 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1524 1525 Not Collective 1526 1527 Input Parameters: 1528 . mat - the matrix 1529 1530 Notes: 1531 You can only call this after a call to MatDensePlaceArray() 1532 1533 Level: developer 1534 1535 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1536 1537 @*/ 1538 PetscErrorCode MatDenseResetArray(Mat mat) 1539 { 1540 PetscErrorCode ierr; 1541 PetscFunctionBegin; 1542 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1543 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 /*@C 1548 MatCreateDense - Creates a parallel matrix in dense format. 1549 1550 Collective 1551 1552 Input Parameters: 1553 + comm - MPI communicator 1554 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1555 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1556 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1557 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1558 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1559 to control all matrix memory allocation. 1560 1561 Output Parameter: 1562 . A - the matrix 1563 1564 Notes: 1565 The dense format is fully compatible with standard Fortran 77 1566 storage by columns. 1567 1568 The data input variable is intended primarily for Fortran programmers 1569 who wish to allocate their own matrix memory space. Most users should 1570 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1571 1572 The user MUST specify either the local or global matrix dimensions 1573 (possibly both). 1574 1575 Level: intermediate 1576 1577 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1578 @*/ 1579 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1580 { 1581 PetscErrorCode ierr; 1582 PetscMPIInt size; 1583 1584 PetscFunctionBegin; 1585 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1586 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1587 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1588 if (size > 1) { 1589 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1590 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1591 if (data) { /* user provided data array, so no need to assemble */ 1592 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1593 (*A)->assembled = PETSC_TRUE; 1594 } 1595 } else { 1596 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1597 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1603 { 1604 Mat mat; 1605 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1606 PetscErrorCode ierr; 1607 1608 PetscFunctionBegin; 1609 *newmat = 0; 1610 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1611 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1612 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1613 a = (Mat_MPIDense*)mat->data; 1614 1615 mat->factortype = A->factortype; 1616 mat->assembled = PETSC_TRUE; 1617 mat->preallocated = PETSC_TRUE; 1618 1619 a->size = oldmat->size; 1620 a->rank = oldmat->rank; 1621 mat->insertmode = NOT_SET_VALUES; 1622 a->nvec = oldmat->nvec; 1623 a->donotstash = oldmat->donotstash; 1624 1625 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1626 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1627 1628 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1629 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1630 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1631 1632 *newmat = mat; 1633 PetscFunctionReturn(0); 1634 } 1635 1636 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1637 { 1638 PetscErrorCode ierr; 1639 PetscMPIInt rank,size; 1640 const PetscInt *rowners; 1641 PetscInt i,m,n,nz,j,mMax; 1642 PetscScalar *array,*vals,*vals_ptr; 1643 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1644 1645 PetscFunctionBegin; 1646 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1647 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1648 1649 /* determine ownership of rows and columns */ 1650 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1651 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1652 1653 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1654 if (!a->A) { 1655 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1656 } 1657 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1658 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1659 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1660 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1661 if (!rank) { 1662 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1663 1664 /* read in my part of the matrix numerical values */ 1665 ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1666 1667 /* insert into matrix-by row (this is why cannot directly read into array */ 1668 vals_ptr = vals; 1669 for (i=0; i<m; i++) { 1670 for (j=0; j<N; j++) { 1671 array[i + j*m] = *vals_ptr++; 1672 } 1673 } 1674 1675 /* read in other processors and ship out */ 1676 for (i=1; i<size; i++) { 1677 nz = (rowners[i+1] - rowners[i])*N; 1678 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1679 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1680 } 1681 } else { 1682 /* receive numeric values */ 1683 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1684 1685 /* receive message of values*/ 1686 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1687 1688 /* insert into matrix-by row (this is why cannot directly read into array */ 1689 vals_ptr = vals; 1690 for (i=0; i<m; i++) { 1691 for (j=0; j<N; j++) { 1692 array[i + j*m] = *vals_ptr++; 1693 } 1694 } 1695 } 1696 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1697 ierr = PetscFree(vals);CHKERRQ(ierr); 1698 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer) 1704 { 1705 Mat_MPIDense *a; 1706 PetscScalar *vals,*svals; 1707 MPI_Comm comm; 1708 MPI_Status status; 1709 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1710 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1711 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1712 PetscInt i,nz,j,rstart,rend; 1713 int fd; 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1718 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1719 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1720 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1721 if (!rank) { 1722 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 1723 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1724 } 1725 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1726 M = header[1]; N = header[2]; nz = header[3]; 1727 1728 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1729 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1730 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1731 1732 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1733 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1734 1735 /* 1736 Handle case where matrix is stored on disk as a dense matrix 1737 */ 1738 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1739 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 /* determine ownership of all rows */ 1744 if (newmat->rmap->n < 0) { 1745 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1746 } else { 1747 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1748 } 1749 if (newmat->cmap->n < 0) { 1750 n = PETSC_DECIDE; 1751 } else { 1752 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1753 } 1754 1755 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1756 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1757 rowners[0] = 0; 1758 for (i=2; i<=size; i++) { 1759 rowners[i] += rowners[i-1]; 1760 } 1761 rstart = rowners[rank]; 1762 rend = rowners[rank+1]; 1763 1764 /* distribute row lengths to all processors */ 1765 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1766 if (!rank) { 1767 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1768 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 1769 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1770 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1771 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1772 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1773 } else { 1774 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1775 } 1776 1777 if (!rank) { 1778 /* calculate the number of nonzeros on each processor */ 1779 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 1780 for (i=0; i<size; i++) { 1781 for (j=rowners[i]; j< rowners[i+1]; j++) { 1782 procsnz[i] += rowlengths[j]; 1783 } 1784 } 1785 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1786 1787 /* determine max buffer needed and allocate it */ 1788 maxnz = 0; 1789 for (i=0; i<size; i++) { 1790 maxnz = PetscMax(maxnz,procsnz[i]); 1791 } 1792 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1793 1794 /* read in my part of the matrix column indices */ 1795 nz = procsnz[0]; 1796 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1797 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1798 1799 /* read in every one elses and ship off */ 1800 for (i=1; i<size; i++) { 1801 nz = procsnz[i]; 1802 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 1803 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1804 } 1805 ierr = PetscFree(cols);CHKERRQ(ierr); 1806 } else { 1807 /* determine buffer space needed for message */ 1808 nz = 0; 1809 for (i=0; i<m; i++) { 1810 nz += ourlens[i]; 1811 } 1812 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1813 1814 /* receive message of column indices*/ 1815 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1816 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1817 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1818 } 1819 1820 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1821 a = (Mat_MPIDense*)newmat->data; 1822 if (!a->A) { 1823 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1824 } 1825 1826 if (!rank) { 1827 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1828 1829 /* read in my part of the matrix numerical values */ 1830 nz = procsnz[0]; 1831 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1832 1833 /* insert into matrix */ 1834 jj = rstart; 1835 smycols = mycols; 1836 svals = vals; 1837 for (i=0; i<m; i++) { 1838 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1839 smycols += ourlens[i]; 1840 svals += ourlens[i]; 1841 jj++; 1842 } 1843 1844 /* read in other processors and ship out */ 1845 for (i=1; i<size; i++) { 1846 nz = procsnz[i]; 1847 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 1848 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1849 } 1850 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1851 } else { 1852 /* receive numeric values */ 1853 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1854 1855 /* receive message of values*/ 1856 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1857 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1858 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1859 1860 /* insert into matrix */ 1861 jj = rstart; 1862 smycols = mycols; 1863 svals = vals; 1864 for (i=0; i<m; i++) { 1865 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1866 smycols += ourlens[i]; 1867 svals += ourlens[i]; 1868 jj++; 1869 } 1870 } 1871 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1872 ierr = PetscFree(vals);CHKERRQ(ierr); 1873 ierr = PetscFree(mycols);CHKERRQ(ierr); 1874 ierr = PetscFree(rowners);CHKERRQ(ierr); 1875 1876 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1877 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1882 { 1883 PetscErrorCode ierr; 1884 PetscBool isbinary; 1885 #if defined(PETSC_HAVE_HDF5) 1886 PetscBool ishdf5; 1887 #endif 1888 1889 PetscFunctionBegin; 1890 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1891 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1892 /* force binary viewer to load .info file if it has not yet done so */ 1893 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1894 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1895 #if defined(PETSC_HAVE_HDF5) 1896 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1897 #endif 1898 if (isbinary) { 1899 ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr); 1900 #if defined(PETSC_HAVE_HDF5) 1901 } else if (ishdf5) { 1902 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1903 #endif 1904 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1905 PetscFunctionReturn(0); 1906 } 1907 1908 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1909 { 1910 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1911 Mat a,b; 1912 PetscBool flg; 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 a = matA->A; 1917 b = matB->A; 1918 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1919 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1924 { 1925 PetscErrorCode ierr; 1926 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1927 Mat_TransMatMultDense *atb = a->atbdense; 1928 1929 PetscFunctionBegin; 1930 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1931 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1932 ierr = PetscFree(atb);CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1937 { 1938 PetscErrorCode ierr; 1939 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1940 Mat_MatTransMultDense *abt = a->abtdense; 1941 1942 PetscFunctionBegin; 1943 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1944 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1945 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1946 ierr = PetscFree(abt);CHKERRQ(ierr); 1947 PetscFunctionReturn(0); 1948 } 1949 1950 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1951 { 1952 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1953 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1954 Mat_TransMatMultDense *atb = c->atbdense; 1955 PetscErrorCode ierr; 1956 MPI_Comm comm; 1957 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1958 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1959 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1960 PetscScalar _DOne=1.0,_DZero=0.0; 1961 PetscBLASInt am,an,bn,aN; 1962 const PetscInt *ranges; 1963 1964 PetscFunctionBegin; 1965 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1966 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1967 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1968 1969 /* compute atbarray = aseq^T * bseq */ 1970 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1972 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1973 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1974 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1975 1976 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1977 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1978 1979 /* arrange atbarray into sendbuf */ 1980 k = 0; 1981 for (proc=0; proc<size; proc++) { 1982 for (j=0; j<cN; j++) { 1983 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1984 } 1985 } 1986 /* sum all atbarray to local values of C */ 1987 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1988 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1989 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1994 { 1995 PetscErrorCode ierr; 1996 Mat Cdense; 1997 MPI_Comm comm; 1998 PetscMPIInt size; 1999 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2000 Mat_MPIDense *c; 2001 Mat_TransMatMultDense *atb; 2002 2003 PetscFunctionBegin; 2004 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2005 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2006 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2007 } 2008 2009 /* create matrix product Cdense */ 2010 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2011 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2012 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2013 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2014 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2015 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2016 *C = Cdense; 2017 2018 /* create data structure for reuse Cdense */ 2019 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2020 ierr = PetscNew(&atb);CHKERRQ(ierr); 2021 cM = Cdense->rmap->N; 2022 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 2023 2024 c = (Mat_MPIDense*)Cdense->data; 2025 c->atbdense = atb; 2026 atb->destroy = Cdense->ops->destroy; 2027 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2028 PetscFunctionReturn(0); 2029 } 2030 2031 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2032 { 2033 PetscErrorCode ierr; 2034 2035 PetscFunctionBegin; 2036 if (scall == MAT_INITIAL_MATRIX) { 2037 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2038 } 2039 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 2044 { 2045 PetscErrorCode ierr; 2046 Mat Cdense; 2047 MPI_Comm comm; 2048 PetscMPIInt i, size; 2049 PetscInt maxRows, bufsiz; 2050 Mat_MPIDense *c; 2051 PetscMPIInt tag; 2052 const char *algTypes[2] = {"allgatherv","cyclic"}; 2053 PetscInt alg, nalg = 2; 2054 Mat_MatTransMultDense *abt; 2055 2056 PetscFunctionBegin; 2057 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2058 alg = 0; /* default is allgatherv */ 2059 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2060 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 2061 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2062 if (A->cmap->N != B->cmap->N) { 2063 SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 2064 } 2065 2066 /* create matrix product Cdense */ 2067 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2068 ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2069 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2070 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2071 ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 2072 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 *C = Cdense; 2075 2076 /* create data structure for reuse Cdense */ 2077 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2078 ierr = PetscNew(&abt);CHKERRQ(ierr); 2079 abt->tag = tag; 2080 abt->alg = alg; 2081 switch (alg) { 2082 case 1: 2083 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2084 bufsiz = A->cmap->N * maxRows; 2085 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2086 break; 2087 default: 2088 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2089 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2090 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2091 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2092 break; 2093 } 2094 2095 c = (Mat_MPIDense*)Cdense->data; 2096 c->abtdense = abt; 2097 abt->destroy = Cdense->ops->destroy; 2098 Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2099 PetscFunctionReturn(0); 2100 } 2101 2102 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2103 { 2104 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2105 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2106 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2107 Mat_MatTransMultDense *abt = c->abtdense; 2108 PetscErrorCode ierr; 2109 MPI_Comm comm; 2110 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2111 PetscScalar *sendbuf, *recvbuf=0, *carray; 2112 PetscInt i,cK=A->cmap->N,k,j,bn; 2113 PetscScalar _DOne=1.0,_DZero=0.0; 2114 PetscBLASInt cm, cn, ck; 2115 MPI_Request reqs[2]; 2116 const PetscInt *ranges; 2117 2118 PetscFunctionBegin; 2119 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2120 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2121 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2122 2123 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2124 bn = B->rmap->n; 2125 if (bseq->lda == bn) { 2126 sendbuf = bseq->v; 2127 } else { 2128 sendbuf = abt->buf[0]; 2129 for (k = 0, i = 0; i < cK; i++) { 2130 for (j = 0; j < bn; j++, k++) { 2131 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2132 } 2133 } 2134 } 2135 if (size > 1) { 2136 sendto = (rank + size - 1) % size; 2137 recvfrom = (rank + size + 1) % size; 2138 } else { 2139 sendto = recvfrom = 0; 2140 } 2141 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2142 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2143 recvisfrom = rank; 2144 for (i = 0; i < size; i++) { 2145 /* we have finished receiving in sending, bufs can be read/modified */ 2146 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2147 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2148 2149 if (nextrecvisfrom != rank) { 2150 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2151 sendsiz = cK * bn; 2152 recvsiz = cK * nextbn; 2153 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2154 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2155 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2156 } 2157 2158 /* local aseq * sendbuf^T */ 2159 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2160 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 2161 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 2162 2163 if (nextrecvisfrom != rank) { 2164 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2165 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2166 } 2167 bn = nextbn; 2168 recvisfrom = nextrecvisfrom; 2169 sendbuf = recvbuf; 2170 } 2171 PetscFunctionReturn(0); 2172 } 2173 2174 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2175 { 2176 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2177 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2178 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2179 Mat_MatTransMultDense *abt = c->abtdense; 2180 PetscErrorCode ierr; 2181 MPI_Comm comm; 2182 PetscMPIInt rank,size; 2183 PetscScalar *sendbuf, *recvbuf; 2184 PetscInt i,cK=A->cmap->N,k,j,bn; 2185 PetscScalar _DOne=1.0,_DZero=0.0; 2186 PetscBLASInt cm, cn, ck; 2187 2188 PetscFunctionBegin; 2189 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2190 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2191 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2192 2193 /* copy transpose of B into buf[0] */ 2194 bn = B->rmap->n; 2195 sendbuf = abt->buf[0]; 2196 recvbuf = abt->buf[1]; 2197 for (k = 0, j = 0; j < bn; j++) { 2198 for (i = 0; i < cK; i++, k++) { 2199 sendbuf[k] = bseq->v[i * bseq->lda + j]; 2200 } 2201 } 2202 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2203 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2204 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2205 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2206 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2211 { 2212 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2213 Mat_MatTransMultDense *abt = c->abtdense; 2214 PetscErrorCode ierr; 2215 2216 PetscFunctionBegin; 2217 switch (abt->alg) { 2218 case 1: 2219 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2220 break; 2221 default: 2222 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2223 break; 2224 } 2225 PetscFunctionReturn(0); 2226 } 2227 2228 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 2229 { 2230 PetscErrorCode ierr; 2231 2232 PetscFunctionBegin; 2233 if (scall == MAT_INITIAL_MATRIX) { 2234 ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2235 } 2236 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2237 PetscFunctionReturn(0); 2238 } 2239 2240 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2241 { 2242 PetscErrorCode ierr; 2243 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2244 Mat_MatMultDense *ab = a->abdense; 2245 2246 PetscFunctionBegin; 2247 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2248 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2249 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2250 2251 ierr = (ab->destroy)(A);CHKERRQ(ierr); 2252 ierr = PetscFree(ab);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #if defined(PETSC_HAVE_ELEMENTAL) 2257 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2258 { 2259 PetscErrorCode ierr; 2260 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2261 Mat_MatMultDense *ab=c->abdense; 2262 2263 PetscFunctionBegin; 2264 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2265 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2266 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2267 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 2272 { 2273 PetscErrorCode ierr; 2274 Mat Ae,Be,Ce; 2275 Mat_MPIDense *c; 2276 Mat_MatMultDense *ab; 2277 2278 PetscFunctionBegin; 2279 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2280 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2281 } 2282 2283 /* convert A and B to Elemental matrices Ae and Be */ 2284 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2285 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2286 2287 /* Ce = Ae*Be */ 2288 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2289 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2290 2291 /* convert Ce to C */ 2292 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2293 2294 /* create data structure for reuse Cdense */ 2295 ierr = PetscNew(&ab);CHKERRQ(ierr); 2296 c = (Mat_MPIDense*)(*C)->data; 2297 c->abdense = ab; 2298 2299 ab->Ae = Ae; 2300 ab->Be = Be; 2301 ab->Ce = Ce; 2302 ab->destroy = (*C)->ops->destroy; 2303 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2304 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2305 PetscFunctionReturn(0); 2306 } 2307 2308 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2309 { 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 if (scall == MAT_INITIAL_MATRIX) { /* symbolic product includes numeric product */ 2314 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2315 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2316 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2317 } else { 2318 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2319 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2320 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2321 } 2322 PetscFunctionReturn(0); 2323 } 2324 #endif 2325 2326