1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 32 if (flg) *B = mat->A; 33 else *B = A; 34 PetscFunctionReturn(0); 35 } 36 37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 38 { 39 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 40 PetscErrorCode ierr; 41 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 42 43 PetscFunctionBegin; 44 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 45 lrow = row - rstart; 46 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 47 PetscFunctionReturn(0); 48 } 49 50 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 51 { 52 PetscErrorCode ierr; 53 54 PetscFunctionBegin; 55 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 56 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 61 { 62 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 63 PetscErrorCode ierr; 64 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 65 PetscScalar *array; 66 MPI_Comm comm; 67 Mat B; 68 69 PetscFunctionBegin; 70 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 71 72 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 73 if (!B) { 74 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 75 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 76 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 77 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 78 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 79 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 80 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 81 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 82 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 84 *a = B; 85 ierr = MatDestroy(&B);CHKERRQ(ierr); 86 } else *a = B; 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 91 { 92 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 93 PetscErrorCode ierr; 94 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 95 PetscBool roworiented = A->roworiented; 96 97 PetscFunctionBegin; 98 for (i=0; i<m; i++) { 99 if (idxm[i] < 0) continue; 100 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 101 if (idxm[i] >= rstart && idxm[i] < rend) { 102 row = idxm[i] - rstart; 103 if (roworiented) { 104 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 105 } else { 106 for (j=0; j<n; j++) { 107 if (idxn[j] < 0) continue; 108 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 109 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 110 } 111 } 112 } else if (!A->donotstash) { 113 mat->assembled = PETSC_FALSE; 114 if (roworiented) { 115 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 116 } else { 117 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 118 } 119 } 120 } 121 PetscFunctionReturn(0); 122 } 123 124 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 125 { 126 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 127 PetscErrorCode ierr; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 130 PetscFunctionBegin; 131 for (i=0; i<m; i++) { 132 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 133 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 134 if (idxm[i] >= rstart && idxm[i] < rend) { 135 row = idxm[i] - rstart; 136 for (j=0; j<n; j++) { 137 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 138 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 139 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 140 } 141 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 142 } 143 PetscFunctionReturn(0); 144 } 145 146 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 147 { 148 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 157 { 158 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 159 PetscErrorCode ierr; 160 161 PetscFunctionBegin; 162 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 167 { 168 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 169 PetscErrorCode ierr; 170 171 PetscFunctionBegin; 172 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 173 PetscFunctionReturn(0); 174 } 175 176 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 177 { 178 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 197 { 198 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 199 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 200 PetscErrorCode ierr; 201 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 202 const PetscInt *irow,*icol; 203 PetscScalar *av,*bv,*v = lmat->v; 204 Mat newmat; 205 IS iscol_local; 206 MPI_Comm comm_is,comm_mat; 207 208 PetscFunctionBegin; 209 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 210 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 211 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 212 213 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 214 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 215 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 216 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 218 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 219 220 /* No parallel redistribution currently supported! Should really check each index set 221 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 222 original matrix! */ 223 224 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 225 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 226 227 /* Check submatrix call */ 228 if (scall == MAT_REUSE_MATRIX) { 229 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 230 /* Really need to test rows and column sizes! */ 231 newmat = *B; 232 } else { 233 /* Create and fill new matrix */ 234 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 235 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 236 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 237 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 238 } 239 240 /* Now extract the data pointers and do the copy, column at a time */ 241 newmatd = (Mat_MPIDense*)newmat->data; 242 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 243 244 for (i=0; i<Ncols; i++) { 245 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 246 for (j=0; j<nrows; j++) { 247 *bv++ = av[irow[j] - rstart]; 248 } 249 } 250 251 /* Assemble the matrices so that the correct flags are set */ 252 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 253 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 255 /* Free work space */ 256 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 257 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 258 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 259 *B = newmat; 260 PetscFunctionReturn(0); 261 } 262 263 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 264 { 265 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 266 PetscErrorCode ierr; 267 268 PetscFunctionBegin; 269 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 274 { 275 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 276 PetscErrorCode ierr; 277 278 PetscFunctionBegin; 279 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 284 { 285 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 286 PetscErrorCode ierr; 287 PetscInt nstash,reallocs; 288 289 PetscFunctionBegin; 290 if (mdn->donotstash || mat->nooffprocentries) return(0); 291 292 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 293 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 294 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 295 PetscFunctionReturn(0); 296 } 297 298 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 299 { 300 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 301 PetscErrorCode ierr; 302 PetscInt i,*row,*col,flg,j,rstart,ncols; 303 PetscMPIInt n; 304 PetscScalar *val; 305 306 PetscFunctionBegin; 307 if (!mdn->donotstash && !mat->nooffprocentries) { 308 /* wait on receives */ 309 while (1) { 310 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 311 if (!flg) break; 312 313 for (i=0; i<n;) { 314 /* Now identify the consecutive vals belonging to the same row */ 315 for (j=i,rstart=row[j]; j<n; j++) { 316 if (row[j] != rstart) break; 317 } 318 if (j < n) ncols = j-i; 319 else ncols = n-i; 320 /* Now assemble all these values with a single function call */ 321 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 322 i = j; 323 } 324 } 325 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 326 } 327 328 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 329 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 330 331 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 332 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 333 } 334 PetscFunctionReturn(0); 335 } 336 337 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 338 { 339 PetscErrorCode ierr; 340 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 341 342 PetscFunctionBegin; 343 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 344 PetscFunctionReturn(0); 345 } 346 347 /* the code does not do the diagonal entries correctly unless the 348 matrix is square and the column and row owerships are identical. 349 This is a BUG. The only way to fix it seems to be to access 350 mdn->A and mdn->B directly and not through the MatZeroRows() 351 routine. 352 */ 353 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 354 { 355 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 356 PetscErrorCode ierr; 357 PetscInt i,*owners = A->rmap->range; 358 PetscInt *sizes,j,idx,nsends; 359 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 360 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 361 PetscInt *lens,*lrows,*values; 362 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 363 MPI_Comm comm; 364 MPI_Request *send_waits,*recv_waits; 365 MPI_Status recv_status,*send_status; 366 PetscBool found; 367 const PetscScalar *xx; 368 PetscScalar *bb; 369 370 PetscFunctionBegin; 371 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 372 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 373 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 374 /* first count number of contributors to each processor */ 375 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 376 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 377 for (i=0; i<N; i++) { 378 idx = rows[i]; 379 found = PETSC_FALSE; 380 for (j=0; j<size; j++) { 381 if (idx >= owners[j] && idx < owners[j+1]) { 382 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 383 } 384 } 385 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 386 } 387 nsends = 0; 388 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 389 390 /* inform other processors of number of messages and max length*/ 391 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 392 393 /* post receives: */ 394 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 395 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 396 for (i=0; i<nrecvs; i++) { 397 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 398 } 399 400 /* do sends: 401 1) starts[i] gives the starting index in svalues for stuff going to 402 the ith processor 403 */ 404 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 405 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 406 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 407 408 starts[0] = 0; 409 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 410 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 411 412 starts[0] = 0; 413 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 414 count = 0; 415 for (i=0; i<size; i++) { 416 if (sizes[2*i+1]) { 417 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 418 } 419 } 420 ierr = PetscFree(starts);CHKERRQ(ierr); 421 422 base = owners[rank]; 423 424 /* wait on receives */ 425 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 426 count = nrecvs; 427 slen = 0; 428 while (count) { 429 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 430 /* unpack receives into our local space */ 431 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 432 433 source[imdex] = recv_status.MPI_SOURCE; 434 lens[imdex] = n; 435 slen += n; 436 count--; 437 } 438 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 439 440 /* move the data into the send scatter */ 441 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 442 count = 0; 443 for (i=0; i<nrecvs; i++) { 444 values = rvalues + i*nmax; 445 for (j=0; j<lens[i]; j++) { 446 lrows[count++] = values[j] - base; 447 } 448 } 449 ierr = PetscFree(rvalues);CHKERRQ(ierr); 450 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 451 ierr = PetscFree(owner);CHKERRQ(ierr); 452 ierr = PetscFree(sizes);CHKERRQ(ierr); 453 454 /* fix right hand side if needed */ 455 if (x && b) { 456 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 457 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 458 for (i=0; i<slen; i++) { 459 bb[lrows[i]] = diag*xx[lrows[i]]; 460 } 461 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 462 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 463 } 464 465 /* actually zap the local rows */ 466 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 467 if (diag != 0.0) { 468 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 469 PetscInt m = ll->lda, i; 470 471 for (i=0; i<slen; i++) { 472 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 473 } 474 } 475 ierr = PetscFree(lrows);CHKERRQ(ierr); 476 477 /* wait on sends */ 478 if (nsends) { 479 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 480 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 481 ierr = PetscFree(send_status);CHKERRQ(ierr); 482 } 483 ierr = PetscFree(send_waits);CHKERRQ(ierr); 484 ierr = PetscFree(svalues);CHKERRQ(ierr); 485 PetscFunctionReturn(0); 486 } 487 488 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 489 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 490 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 491 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 492 493 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 494 { 495 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 496 PetscErrorCode ierr; 497 498 PetscFunctionBegin; 499 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 500 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 501 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 506 { 507 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 508 PetscErrorCode ierr; 509 510 PetscFunctionBegin; 511 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 512 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 513 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 PetscErrorCode ierr; 521 PetscScalar zero = 0.0; 522 523 PetscFunctionBegin; 524 ierr = VecSet(yy,zero);CHKERRQ(ierr); 525 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 526 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 527 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 528 PetscFunctionReturn(0); 529 } 530 531 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 532 { 533 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 534 PetscErrorCode ierr; 535 536 PetscFunctionBegin; 537 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 538 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 539 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 540 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 541 PetscFunctionReturn(0); 542 } 543 544 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 545 { 546 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 547 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 548 PetscErrorCode ierr; 549 PetscInt len,i,n,m = A->rmap->n,radd; 550 PetscScalar *x,zero = 0.0; 551 552 PetscFunctionBegin; 553 ierr = VecSet(v,zero);CHKERRQ(ierr); 554 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 555 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 556 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 557 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 558 radd = A->rmap->rstart*m; 559 for (i=0; i<len; i++) { 560 x[i] = aloc->v[radd + i*m + i]; 561 } 562 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 563 PetscFunctionReturn(0); 564 } 565 566 PetscErrorCode MatDestroy_MPIDense(Mat mat) 567 { 568 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 569 PetscErrorCode ierr; 570 571 PetscFunctionBegin; 572 #if defined(PETSC_USE_LOG) 573 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 574 #endif 575 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 576 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 577 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 578 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 579 580 ierr = PetscFree(mat->data);CHKERRQ(ierr); 581 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 582 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 590 #if defined(PETSC_HAVE_ELEMENTAL) 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 592 #endif 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 604 PetscFunctionReturn(0); 605 } 606 607 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 608 609 #include <petscdraw.h> 610 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 611 { 612 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 613 PetscErrorCode ierr; 614 PetscMPIInt rank = mdn->rank; 615 PetscViewerType vtype; 616 PetscBool iascii,isdraw; 617 PetscViewer sviewer; 618 PetscViewerFormat format; 619 620 PetscFunctionBegin; 621 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 622 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 623 if (iascii) { 624 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 625 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 626 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 627 MatInfo info; 628 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 629 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 630 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 631 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 632 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 633 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } else if (format == PETSC_VIEWER_ASCII_INFO) { 636 PetscFunctionReturn(0); 637 } 638 } else if (isdraw) { 639 PetscDraw draw; 640 PetscBool isnull; 641 642 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 643 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 644 if (isnull) PetscFunctionReturn(0); 645 } 646 647 { 648 /* assemble the entire matrix onto first processor. */ 649 Mat A; 650 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 651 PetscInt *cols; 652 PetscScalar *vals; 653 654 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 655 if (!rank) { 656 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 657 } else { 658 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 659 } 660 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 661 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 662 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 663 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 664 665 /* Copy the matrix ... This isn't the most efficient means, 666 but it's quick for now */ 667 A->insertmode = INSERT_VALUES; 668 669 row = mat->rmap->rstart; 670 m = mdn->A->rmap->n; 671 for (i=0; i<m; i++) { 672 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 673 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 674 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 675 row++; 676 } 677 678 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 679 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 680 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 681 if (!rank) { 682 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 683 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 684 } 685 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 686 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 687 ierr = MatDestroy(&A);CHKERRQ(ierr); 688 } 689 PetscFunctionReturn(0); 690 } 691 692 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 693 { 694 PetscErrorCode ierr; 695 PetscBool iascii,isbinary,isdraw,issocket; 696 697 PetscFunctionBegin; 698 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 699 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 700 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 701 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 702 703 if (iascii || issocket || isdraw) { 704 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 705 } else if (isbinary) { 706 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 707 } 708 PetscFunctionReturn(0); 709 } 710 711 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 712 { 713 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 714 Mat mdn = mat->A; 715 PetscErrorCode ierr; 716 PetscLogDouble isend[5],irecv[5]; 717 718 PetscFunctionBegin; 719 info->block_size = 1.0; 720 721 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 722 723 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 724 isend[3] = info->memory; isend[4] = info->mallocs; 725 if (flag == MAT_LOCAL) { 726 info->nz_used = isend[0]; 727 info->nz_allocated = isend[1]; 728 info->nz_unneeded = isend[2]; 729 info->memory = isend[3]; 730 info->mallocs = isend[4]; 731 } else if (flag == MAT_GLOBAL_MAX) { 732 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 733 734 info->nz_used = irecv[0]; 735 info->nz_allocated = irecv[1]; 736 info->nz_unneeded = irecv[2]; 737 info->memory = irecv[3]; 738 info->mallocs = irecv[4]; 739 } else if (flag == MAT_GLOBAL_SUM) { 740 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 741 742 info->nz_used = irecv[0]; 743 info->nz_allocated = irecv[1]; 744 info->nz_unneeded = irecv[2]; 745 info->memory = irecv[3]; 746 info->mallocs = irecv[4]; 747 } 748 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 749 info->fill_ratio_needed = 0; 750 info->factor_mallocs = 0; 751 PetscFunctionReturn(0); 752 } 753 754 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 755 { 756 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 757 PetscErrorCode ierr; 758 759 PetscFunctionBegin; 760 switch (op) { 761 case MAT_NEW_NONZERO_LOCATIONS: 762 case MAT_NEW_NONZERO_LOCATION_ERR: 763 case MAT_NEW_NONZERO_ALLOCATION_ERR: 764 MatCheckPreallocated(A,1); 765 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 766 break; 767 case MAT_ROW_ORIENTED: 768 MatCheckPreallocated(A,1); 769 a->roworiented = flg; 770 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 771 break; 772 case MAT_NEW_DIAGONALS: 773 case MAT_KEEP_NONZERO_PATTERN: 774 case MAT_USE_HASH_TABLE: 775 case MAT_SORTED_FULL: 776 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 777 break; 778 case MAT_IGNORE_OFF_PROC_ENTRIES: 779 a->donotstash = flg; 780 break; 781 case MAT_SYMMETRIC: 782 case MAT_STRUCTURALLY_SYMMETRIC: 783 case MAT_HERMITIAN: 784 case MAT_SYMMETRY_ETERNAL: 785 case MAT_IGNORE_LOWER_TRIANGULAR: 786 case MAT_IGNORE_ZERO_ENTRIES: 787 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 788 break; 789 default: 790 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 796 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 797 { 798 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 799 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 800 const PetscScalar *l,*r; 801 PetscScalar x,*v; 802 PetscErrorCode ierr; 803 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 804 805 PetscFunctionBegin; 806 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 807 if (ll) { 808 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 809 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 810 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 811 for (i=0; i<m; i++) { 812 x = l[i]; 813 v = mat->v + i; 814 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 815 } 816 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 817 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 818 } 819 if (rr) { 820 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 821 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 822 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 825 for (i=0; i<n; i++) { 826 x = r[i]; 827 v = mat->v + i*m; 828 for (j=0; j<m; j++) (*v++) *= x; 829 } 830 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 831 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 837 { 838 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 839 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 840 PetscErrorCode ierr; 841 PetscInt i,j; 842 PetscReal sum = 0.0; 843 PetscScalar *v = mat->v; 844 845 PetscFunctionBegin; 846 if (mdn->size == 1) { 847 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 848 } else { 849 if (type == NORM_FROBENIUS) { 850 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 851 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 852 } 853 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 854 *nrm = PetscSqrtReal(*nrm); 855 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 856 } else if (type == NORM_1) { 857 PetscReal *tmp,*tmp2; 858 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 859 *nrm = 0.0; 860 v = mat->v; 861 for (j=0; j<mdn->A->cmap->n; j++) { 862 for (i=0; i<mdn->A->rmap->n; i++) { 863 tmp[j] += PetscAbsScalar(*v); v++; 864 } 865 } 866 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 867 for (j=0; j<A->cmap->N; j++) { 868 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 869 } 870 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 871 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 872 } else if (type == NORM_INFINITY) { /* max row norm */ 873 PetscReal ntemp; 874 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 875 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 876 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 882 { 883 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 884 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 885 Mat B; 886 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 887 PetscErrorCode ierr; 888 PetscInt j,i; 889 PetscScalar *v; 890 891 PetscFunctionBegin; 892 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 893 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 894 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 895 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 896 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 897 } else { 898 B = *matout; 899 } 900 901 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 902 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 903 for (i=0; i<m; i++) rwork[i] = rstart + i; 904 for (j=0; j<n; j++) { 905 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 906 v += m; 907 } 908 ierr = PetscFree(rwork);CHKERRQ(ierr); 909 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 910 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 911 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 912 *matout = B; 913 } else { 914 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 915 } 916 PetscFunctionReturn(0); 917 } 918 919 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 920 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 921 922 PetscErrorCode MatSetUp_MPIDense(Mat A) 923 { 924 PetscErrorCode ierr; 925 926 PetscFunctionBegin; 927 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 928 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 929 if (!A->preallocated) { 930 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 931 } 932 PetscFunctionReturn(0); 933 } 934 935 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 936 { 937 PetscErrorCode ierr; 938 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 939 940 PetscFunctionBegin; 941 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 942 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 PetscErrorCode MatConjugate_MPIDense(Mat mat) 947 { 948 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 949 PetscErrorCode ierr; 950 951 PetscFunctionBegin; 952 ierr = MatConjugate(a->A);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 PetscErrorCode MatRealPart_MPIDense(Mat A) 957 { 958 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = MatRealPart(a->A);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 967 { 968 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 977 { 978 PetscErrorCode ierr; 979 PetscScalar *x; 980 const PetscScalar *a; 981 PetscInt lda; 982 983 PetscFunctionBegin; 984 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 985 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 986 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 987 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 988 ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr); 989 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 990 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 991 PetscFunctionReturn(0); 992 } 993 994 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 995 996 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 997 { 998 PetscErrorCode ierr; 999 PetscInt i,n; 1000 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1001 PetscReal *work; 1002 1003 PetscFunctionBegin; 1004 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1005 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1006 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1007 if (type == NORM_2) { 1008 for (i=0; i<n; i++) work[i] *= work[i]; 1009 } 1010 if (type == NORM_INFINITY) { 1011 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1012 } else { 1013 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1014 } 1015 ierr = PetscFree(work);CHKERRQ(ierr); 1016 if (type == NORM_2) { 1017 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1018 } 1019 PetscFunctionReturn(0); 1020 } 1021 1022 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1023 { 1024 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1025 PetscErrorCode ierr; 1026 PetscScalar *a; 1027 PetscInt m,n,i; 1028 1029 PetscFunctionBegin; 1030 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1031 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1032 for (i=0; i<m*n; i++) { 1033 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1034 } 1035 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1040 1041 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1042 { 1043 PetscFunctionBegin; 1044 *missing = PETSC_FALSE; 1045 PetscFunctionReturn(0); 1046 } 1047 1048 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1049 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1050 1051 /* -------------------------------------------------------------------*/ 1052 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1053 MatGetRow_MPIDense, 1054 MatRestoreRow_MPIDense, 1055 MatMult_MPIDense, 1056 /* 4*/ MatMultAdd_MPIDense, 1057 MatMultTranspose_MPIDense, 1058 MatMultTransposeAdd_MPIDense, 1059 0, 1060 0, 1061 0, 1062 /* 10*/ 0, 1063 0, 1064 0, 1065 0, 1066 MatTranspose_MPIDense, 1067 /* 15*/ MatGetInfo_MPIDense, 1068 MatEqual_MPIDense, 1069 MatGetDiagonal_MPIDense, 1070 MatDiagonalScale_MPIDense, 1071 MatNorm_MPIDense, 1072 /* 20*/ MatAssemblyBegin_MPIDense, 1073 MatAssemblyEnd_MPIDense, 1074 MatSetOption_MPIDense, 1075 MatZeroEntries_MPIDense, 1076 /* 24*/ MatZeroRows_MPIDense, 1077 0, 1078 0, 1079 0, 1080 0, 1081 /* 29*/ MatSetUp_MPIDense, 1082 0, 1083 0, 1084 MatGetDiagonalBlock_MPIDense, 1085 0, 1086 /* 34*/ MatDuplicate_MPIDense, 1087 0, 1088 0, 1089 0, 1090 0, 1091 /* 39*/ MatAXPY_MPIDense, 1092 MatCreateSubMatrices_MPIDense, 1093 0, 1094 MatGetValues_MPIDense, 1095 0, 1096 /* 44*/ 0, 1097 MatScale_MPIDense, 1098 MatShift_Basic, 1099 0, 1100 0, 1101 /* 49*/ MatSetRandom_MPIDense, 1102 0, 1103 0, 1104 0, 1105 0, 1106 /* 54*/ 0, 1107 0, 1108 0, 1109 0, 1110 0, 1111 /* 59*/ MatCreateSubMatrix_MPIDense, 1112 MatDestroy_MPIDense, 1113 MatView_MPIDense, 1114 0, 1115 0, 1116 /* 64*/ 0, 1117 0, 1118 0, 1119 0, 1120 0, 1121 /* 69*/ 0, 1122 0, 1123 0, 1124 0, 1125 0, 1126 /* 74*/ 0, 1127 0, 1128 0, 1129 0, 1130 0, 1131 /* 79*/ 0, 1132 0, 1133 0, 1134 0, 1135 /* 83*/ MatLoad_MPIDense, 1136 0, 1137 0, 1138 0, 1139 0, 1140 0, 1141 #if defined(PETSC_HAVE_ELEMENTAL) 1142 /* 89*/ 0, 1143 0, 1144 #else 1145 /* 89*/ 0, 1146 0, 1147 #endif 1148 MatMatMultNumeric_MPIDense, 1149 0, 1150 0, 1151 /* 94*/ 0, 1152 0, 1153 0, 1154 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1155 0, 1156 /* 99*/ MatProductSetFromOptions_MPIDense, 1157 0, 1158 0, 1159 MatConjugate_MPIDense, 1160 0, 1161 /*104*/ 0, 1162 MatRealPart_MPIDense, 1163 MatImaginaryPart_MPIDense, 1164 0, 1165 0, 1166 /*109*/ 0, 1167 0, 1168 0, 1169 MatGetColumnVector_MPIDense, 1170 MatMissingDiagonal_MPIDense, 1171 /*114*/ 0, 1172 0, 1173 0, 1174 0, 1175 0, 1176 /*119*/ 0, 1177 0, 1178 0, 1179 0, 1180 0, 1181 /*124*/ 0, 1182 MatGetColumnNorms_MPIDense, 1183 0, 1184 0, 1185 0, 1186 /*129*/ 0, 1187 0, 1188 0, 1189 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1190 0, 1191 /*134*/ 0, 1192 0, 1193 0, 1194 0, 1195 0, 1196 /*139*/ 0, 1197 0, 1198 0, 1199 0, 1200 0, 1201 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1202 /*145*/ 0, 1203 0, 1204 0 1205 }; 1206 1207 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1208 { 1209 Mat_MPIDense *a; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1214 mat->preallocated = PETSC_TRUE; 1215 /* Note: For now, when data is specified above, this assumes the user correctly 1216 allocates the local dense storage space. We should add error checking. */ 1217 1218 a = (Mat_MPIDense*)mat->data; 1219 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1220 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1221 a->nvec = mat->cmap->n; 1222 1223 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1224 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1225 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1226 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1227 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #if defined(PETSC_HAVE_ELEMENTAL) 1232 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1233 { 1234 Mat mat_elemental; 1235 PetscErrorCode ierr; 1236 PetscScalar *v; 1237 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1238 1239 PetscFunctionBegin; 1240 if (reuse == MAT_REUSE_MATRIX) { 1241 mat_elemental = *newmat; 1242 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1243 } else { 1244 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1245 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1246 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1247 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1248 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1249 } 1250 1251 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1252 for (i=0; i<N; i++) cols[i] = i; 1253 for (i=0; i<m; i++) rows[i] = rstart + i; 1254 1255 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1256 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1257 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1258 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1259 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1260 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1261 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1262 1263 if (reuse == MAT_INPLACE_MATRIX) { 1264 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1265 } else { 1266 *newmat = mat_elemental; 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 #endif 1271 1272 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1273 { 1274 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1283 { 1284 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1293 { 1294 PetscErrorCode ierr; 1295 Mat_MPIDense *mat; 1296 PetscInt m,nloc,N; 1297 1298 PetscFunctionBegin; 1299 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1300 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1301 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1302 PetscInt sum; 1303 1304 if (n == PETSC_DECIDE) { 1305 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1306 } 1307 /* Check sum(n) = N */ 1308 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1309 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1310 1311 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1312 } 1313 1314 /* numeric phase */ 1315 mat = (Mat_MPIDense*)(*outmat)->data; 1316 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1317 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1318 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1323 { 1324 Mat_MPIDense *a; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1329 mat->data = (void*)a; 1330 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1331 1332 mat->insertmode = NOT_SET_VALUES; 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1334 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1335 1336 /* build cache for off array entries formed */ 1337 a->donotstash = PETSC_FALSE; 1338 1339 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1340 1341 /* stuff used for matrix vector multiply */ 1342 a->lvec = 0; 1343 a->Mvctx = 0; 1344 a->roworiented = PETSC_TRUE; 1345 1346 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1347 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1348 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1349 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1353 #if defined(PETSC_HAVE_ELEMENTAL) 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1355 #endif 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1363 1364 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1365 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1366 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1367 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1368 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 /*MC 1373 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1374 1375 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1376 and MATMPIDENSE otherwise. 1377 1378 Options Database Keys: 1379 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1380 1381 Level: beginner 1382 1383 1384 .seealso: MATSEQDENSE,MATMPIDENSE 1385 M*/ 1386 1387 /*@C 1388 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1389 1390 Collective 1391 1392 Input Parameters: 1393 . B - the matrix 1394 - data - optional location of matrix data. Set data=NULL for PETSc 1395 to control all matrix memory allocation. 1396 1397 Notes: 1398 The dense format is fully compatible with standard Fortran 77 1399 storage by columns. 1400 1401 The data input variable is intended primarily for Fortran programmers 1402 who wish to allocate their own matrix memory space. Most users should 1403 set data=NULL. 1404 1405 Level: intermediate 1406 1407 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1408 @*/ 1409 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1410 { 1411 PetscErrorCode ierr; 1412 1413 PetscFunctionBegin; 1414 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /*@ 1419 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1420 array provided by the user. This is useful to avoid copying an array 1421 into a matrix 1422 1423 Not Collective 1424 1425 Input Parameters: 1426 + mat - the matrix 1427 - array - the array in column major order 1428 1429 Notes: 1430 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1431 freed when the matrix is destroyed. 1432 1433 Level: developer 1434 1435 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1436 1437 @*/ 1438 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1439 { 1440 PetscErrorCode ierr; 1441 PetscFunctionBegin; 1442 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1443 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /*@ 1448 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1449 1450 Not Collective 1451 1452 Input Parameters: 1453 . mat - the matrix 1454 1455 Notes: 1456 You can only call this after a call to MatDensePlaceArray() 1457 1458 Level: developer 1459 1460 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1461 1462 @*/ 1463 PetscErrorCode MatDenseResetArray(Mat mat) 1464 { 1465 PetscErrorCode ierr; 1466 PetscFunctionBegin; 1467 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1468 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 /*@C 1473 MatCreateDense - Creates a parallel matrix in dense format. 1474 1475 Collective 1476 1477 Input Parameters: 1478 + comm - MPI communicator 1479 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1480 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1481 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1482 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1483 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1484 to control all matrix memory allocation. 1485 1486 Output Parameter: 1487 . A - the matrix 1488 1489 Notes: 1490 The dense format is fully compatible with standard Fortran 77 1491 storage by columns. 1492 1493 The data input variable is intended primarily for Fortran programmers 1494 who wish to allocate their own matrix memory space. Most users should 1495 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1496 1497 The user MUST specify either the local or global matrix dimensions 1498 (possibly both). 1499 1500 Level: intermediate 1501 1502 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1503 @*/ 1504 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1505 { 1506 PetscErrorCode ierr; 1507 PetscMPIInt size; 1508 1509 PetscFunctionBegin; 1510 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1511 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1512 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1513 if (size > 1) { 1514 PetscBool havedata = (PetscBool)!!data; 1515 1516 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1517 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1518 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 1519 if (havedata) { /* user provided data array, so no need to assemble */ 1520 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1521 (*A)->assembled = PETSC_TRUE; 1522 } 1523 } else { 1524 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1525 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1531 { 1532 Mat mat; 1533 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 *newmat = 0; 1538 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1539 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1540 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1541 a = (Mat_MPIDense*)mat->data; 1542 1543 mat->factortype = A->factortype; 1544 mat->assembled = PETSC_TRUE; 1545 mat->preallocated = PETSC_TRUE; 1546 1547 a->size = oldmat->size; 1548 a->rank = oldmat->rank; 1549 mat->insertmode = NOT_SET_VALUES; 1550 a->nvec = oldmat->nvec; 1551 a->donotstash = oldmat->donotstash; 1552 1553 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1554 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1555 1556 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1557 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1558 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1559 1560 *newmat = mat; 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1565 { 1566 PetscErrorCode ierr; 1567 PetscBool isbinary; 1568 #if defined(PETSC_HAVE_HDF5) 1569 PetscBool ishdf5; 1570 #endif 1571 1572 PetscFunctionBegin; 1573 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1574 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1575 /* force binary viewer to load .info file if it has not yet done so */ 1576 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1577 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1578 #if defined(PETSC_HAVE_HDF5) 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1580 #endif 1581 if (isbinary) { 1582 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 1583 #if defined(PETSC_HAVE_HDF5) 1584 } else if (ishdf5) { 1585 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1586 #endif 1587 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1592 { 1593 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1594 Mat a,b; 1595 PetscBool flg; 1596 PetscErrorCode ierr; 1597 1598 PetscFunctionBegin; 1599 a = matA->A; 1600 b = matB->A; 1601 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1602 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1607 { 1608 PetscErrorCode ierr; 1609 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1610 Mat_TransMatMultDense *atb = a->atbdense; 1611 1612 PetscFunctionBegin; 1613 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1614 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1615 ierr = PetscFree(atb);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1620 { 1621 PetscErrorCode ierr; 1622 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1623 Mat_MatTransMultDense *abt = a->abtdense; 1624 1625 PetscFunctionBegin; 1626 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1627 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1628 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1629 ierr = PetscFree(abt);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 1633 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1634 { 1635 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1636 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1637 Mat_TransMatMultDense *atb = c->atbdense; 1638 PetscErrorCode ierr; 1639 MPI_Comm comm; 1640 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1641 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1642 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1643 PetscScalar _DOne=1.0,_DZero=0.0; 1644 PetscBLASInt am,an,bn,aN; 1645 const PetscInt *ranges; 1646 1647 PetscFunctionBegin; 1648 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1649 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1650 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1651 1652 /* compute atbarray = aseq^T * bseq */ 1653 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1654 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1655 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1656 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1657 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1658 1659 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1660 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1661 1662 /* arrange atbarray into sendbuf */ 1663 k = 0; 1664 for (proc=0; proc<size; proc++) { 1665 for (j=0; j<cN; j++) { 1666 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1667 } 1668 } 1669 /* sum all atbarray to local values of C */ 1670 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1671 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1672 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 1677 { 1678 PetscErrorCode ierr; 1679 MPI_Comm comm; 1680 PetscMPIInt size; 1681 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1682 Mat_MPIDense *c; 1683 Mat_TransMatMultDense *atb; 1684 1685 PetscFunctionBegin; 1686 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1687 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1688 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1689 } 1690 1691 /* create matrix product C */ 1692 ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1693 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 1694 ierr = MatSetUp(C);CHKERRQ(ierr); 1695 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1696 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 1698 /* create data structure for reuse C */ 1699 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1700 ierr = PetscNew(&atb);CHKERRQ(ierr); 1701 cM = C->rmap->N; 1702 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1703 1704 c = (Mat_MPIDense*)C->data; 1705 c->atbdense = atb; 1706 atb->destroy = C->ops->destroy; 1707 C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1708 PetscFunctionReturn(0); 1709 } 1710 1711 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 1712 { 1713 PetscErrorCode ierr; 1714 MPI_Comm comm; 1715 PetscMPIInt i, size; 1716 PetscInt maxRows, bufsiz; 1717 Mat_MPIDense *c; 1718 PetscMPIInt tag; 1719 PetscInt alg; 1720 Mat_MatTransMultDense *abt; 1721 Mat_Product *product = C->product; 1722 PetscBool flg; 1723 1724 PetscFunctionBegin; 1725 /* check local size of A and B */ 1726 if (A->cmap->n != B->cmap->n) { 1727 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 1728 } 1729 1730 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 1731 if (flg) { 1732 alg = 0; 1733 } else alg = 1; 1734 1735 /* setup matrix product C */ 1736 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 1737 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 1738 ierr = MatSetUp(C);CHKERRQ(ierr); 1739 ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 1740 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1741 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1742 1743 /* create data structure for reuse C */ 1744 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 1745 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1746 ierr = PetscNew(&abt);CHKERRQ(ierr); 1747 abt->tag = tag; 1748 abt->alg = alg; 1749 switch (alg) { 1750 case 1: /* alg: "cyclic" */ 1751 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 1752 bufsiz = A->cmap->N * maxRows; 1753 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 1754 break; 1755 default: /* alg: "allgatherv" */ 1756 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 1757 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 1758 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 1759 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 1760 break; 1761 } 1762 1763 c = (Mat_MPIDense*)C->data; 1764 c->abtdense = abt; 1765 abt->destroy = C->ops->destroy; 1766 C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 1767 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 1768 PetscFunctionReturn(0); 1769 } 1770 1771 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 1772 { 1773 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1774 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1775 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 1776 Mat_MatTransMultDense *abt = c->abtdense; 1777 PetscErrorCode ierr; 1778 MPI_Comm comm; 1779 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 1780 PetscScalar *sendbuf, *recvbuf=0, *carray; 1781 PetscInt i,cK=A->cmap->N,k,j,bn; 1782 PetscScalar _DOne=1.0,_DZero=0.0; 1783 PetscBLASInt cm, cn, ck; 1784 MPI_Request reqs[2]; 1785 const PetscInt *ranges; 1786 1787 PetscFunctionBegin; 1788 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1789 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1790 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1791 1792 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 1793 bn = B->rmap->n; 1794 if (bseq->lda == bn) { 1795 sendbuf = bseq->v; 1796 } else { 1797 sendbuf = abt->buf[0]; 1798 for (k = 0, i = 0; i < cK; i++) { 1799 for (j = 0; j < bn; j++, k++) { 1800 sendbuf[k] = bseq->v[i * bseq->lda + j]; 1801 } 1802 } 1803 } 1804 if (size > 1) { 1805 sendto = (rank + size - 1) % size; 1806 recvfrom = (rank + size + 1) % size; 1807 } else { 1808 sendto = recvfrom = 0; 1809 } 1810 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 1811 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 1812 recvisfrom = rank; 1813 for (i = 0; i < size; i++) { 1814 /* we have finished receiving in sending, bufs can be read/modified */ 1815 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 1816 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 1817 1818 if (nextrecvisfrom != rank) { 1819 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 1820 sendsiz = cK * bn; 1821 recvsiz = cK * nextbn; 1822 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 1823 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 1824 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 1825 } 1826 1827 /* local aseq * sendbuf^T */ 1828 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 1829 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 1830 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 1831 1832 if (nextrecvisfrom != rank) { 1833 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 1834 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1835 } 1836 bn = nextbn; 1837 recvisfrom = nextrecvisfrom; 1838 sendbuf = recvbuf; 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 1844 { 1845 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1846 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1847 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 1848 Mat_MatTransMultDense *abt = c->abtdense; 1849 PetscErrorCode ierr; 1850 MPI_Comm comm; 1851 PetscMPIInt rank,size; 1852 PetscScalar *sendbuf, *recvbuf; 1853 PetscInt i,cK=A->cmap->N,k,j,bn; 1854 PetscScalar _DOne=1.0,_DZero=0.0; 1855 PetscBLASInt cm, cn, ck; 1856 1857 PetscFunctionBegin; 1858 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1859 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1860 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1861 1862 /* copy transpose of B into buf[0] */ 1863 bn = B->rmap->n; 1864 sendbuf = abt->buf[0]; 1865 recvbuf = abt->buf[1]; 1866 for (k = 0, j = 0; j < bn; j++) { 1867 for (i = 0; i < cK; i++, k++) { 1868 sendbuf[k] = bseq->v[i * bseq->lda + j]; 1869 } 1870 } 1871 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 1872 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 1873 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 1874 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 1875 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 1876 PetscFunctionReturn(0); 1877 } 1878 1879 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1880 { 1881 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1882 Mat_MatTransMultDense *abt = c->abtdense; 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 switch (abt->alg) { 1887 case 1: 1888 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 1889 break; 1890 default: 1891 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 1892 break; 1893 } 1894 PetscFunctionReturn(0); 1895 } 1896 1897 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1898 { 1899 PetscErrorCode ierr; 1900 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1901 Mat_MatMultDense *ab = a->abdense; 1902 1903 PetscFunctionBegin; 1904 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1905 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1906 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1907 1908 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1909 ierr = PetscFree(ab);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 #if defined(PETSC_HAVE_ELEMENTAL) 1914 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1915 { 1916 PetscErrorCode ierr; 1917 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1918 Mat_MatMultDense *ab=c->abdense; 1919 1920 PetscFunctionBegin; 1921 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1922 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1923 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1924 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 1929 { 1930 PetscErrorCode ierr; 1931 Mat Ae,Be,Ce; 1932 Mat_MPIDense *c; 1933 Mat_MatMultDense *ab; 1934 1935 PetscFunctionBegin; 1936 /* check local size of A and B */ 1937 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1938 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1939 } 1940 1941 /* create elemental matrices Ae and Be */ 1942 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 1943 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1944 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 1945 ierr = MatSetUp(Ae);CHKERRQ(ierr); 1946 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1947 1948 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 1949 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 1950 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 1951 ierr = MatSetUp(Be);CHKERRQ(ierr); 1952 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1953 1954 /* compute symbolic Ce = Ae*Be */ 1955 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 1956 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 1957 1958 /* setup C */ 1959 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1960 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 1961 ierr = MatSetUp(C);CHKERRQ(ierr); 1962 1963 /* create data structure for reuse Cdense */ 1964 ierr = PetscNew(&ab);CHKERRQ(ierr); 1965 c = (Mat_MPIDense*)C->data; 1966 c->abdense = ab; 1967 1968 ab->Ae = Ae; 1969 ab->Be = Be; 1970 ab->Ce = Ce; 1971 ab->destroy = C->ops->destroy; 1972 C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1973 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1974 C->ops->productnumeric = MatProductNumeric_AB; 1975 PetscFunctionReturn(0); 1976 } 1977 #endif 1978 /* ----------------------------------------------- */ 1979 #if defined(PETSC_HAVE_ELEMENTAL) 1980 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 1981 { 1982 PetscFunctionBegin; 1983 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 1984 C->ops->productsymbolic = MatProductSymbolic_AB; 1985 C->ops->productnumeric = MatProductNumeric_AB; 1986 PetscFunctionReturn(0); 1987 } 1988 #endif 1989 1990 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 1991 { 1992 PetscErrorCode ierr; 1993 Mat_Product *product = C->product; 1994 1995 PetscFunctionBegin; 1996 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 1997 C->ops->productnumeric = MatProductNumeric_AtB; 1998 C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 1999 PetscFunctionReturn(0); 2000 } 2001 2002 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2003 { 2004 Mat_Product *product = C->product; 2005 Mat A = product->A,B=product->B; 2006 2007 PetscFunctionBegin; 2008 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2009 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2010 2011 C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 2012 PetscFunctionReturn(0); 2013 } 2014 2015 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2016 { 2017 PetscErrorCode ierr; 2018 Mat_Product *product = C->product; 2019 const char *algTypes[2] = {"allgatherv","cyclic"}; 2020 PetscInt alg,nalg = 2; 2021 PetscBool flg = PETSC_FALSE; 2022 2023 PetscFunctionBegin; 2024 /* Set default algorithm */ 2025 alg = 0; /* default is allgatherv */ 2026 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2027 if (flg) { 2028 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2029 } 2030 2031 /* Get runtime option */ 2032 if (product->api_user) { 2033 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2034 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2035 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2036 } else { 2037 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2038 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2039 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2040 } 2041 if (flg) { 2042 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2043 } 2044 2045 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2046 C->ops->productsymbolic = MatProductSymbolic_ABt; 2047 C->ops->productnumeric = MatProductNumeric_ABt; 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2052 { 2053 PetscErrorCode ierr; 2054 Mat_Product *product = C->product; 2055 2056 PetscFunctionBegin; 2057 switch (product->type) { 2058 #if defined(PETSC_HAVE_ELEMENTAL) 2059 case MATPRODUCT_AB: 2060 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2061 break; 2062 #endif 2063 case MATPRODUCT_AtB: 2064 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2065 break; 2066 case MATPRODUCT_ABt: 2067 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2068 break; 2069 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073