1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 198 { 199 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 200 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 201 PetscErrorCode ierr; 202 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 203 const PetscInt *irow,*icol; 204 PetscScalar *av,*bv,*v = lmat->v; 205 Mat newmat; 206 IS iscol_local; 207 MPI_Comm comm_is,comm_mat; 208 209 PetscFunctionBegin; 210 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 211 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 212 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 213 214 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 215 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 216 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 217 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 218 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 219 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 220 221 /* No parallel redistribution currently supported! Should really check each index set 222 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 223 original matrix! */ 224 225 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 226 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 227 228 /* Check submatrix call */ 229 if (scall == MAT_REUSE_MATRIX) { 230 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 231 /* Really need to test rows and column sizes! */ 232 newmat = *B; 233 } else { 234 /* Create and fill new matrix */ 235 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 236 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 237 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 238 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 239 } 240 241 /* Now extract the data pointers and do the copy, column at a time */ 242 newmatd = (Mat_MPIDense*)newmat->data; 243 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 244 245 for (i=0; i<Ncols; i++) { 246 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 247 for (j=0; j<nrows; j++) { 248 *bv++ = av[irow[j] - rstart]; 249 } 250 } 251 252 /* Assemble the matrices so that the correct flags are set */ 253 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 254 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 255 256 /* Free work space */ 257 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 258 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 259 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 260 *B = newmat; 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 265 { 266 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 275 { 276 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 285 { 286 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 287 MPI_Comm comm; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 294 /* make sure all processors are either in INSERTMODE or ADDMODE */ 295 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 296 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 297 mat->insertmode = addv; /* in case this processor had no cache */ 298 299 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 300 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 301 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 306 { 307 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 308 PetscErrorCode ierr; 309 PetscInt i,*row,*col,flg,j,rstart,ncols; 310 PetscMPIInt n; 311 PetscScalar *val; 312 313 PetscFunctionBegin; 314 /* wait on receives */ 315 while (1) { 316 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 317 if (!flg) break; 318 319 for (i=0; i<n;) { 320 /* Now identify the consecutive vals belonging to the same row */ 321 for (j=i,rstart=row[j]; j<n; j++) { 322 if (row[j] != rstart) break; 323 } 324 if (j < n) ncols = j-i; 325 else ncols = n-i; 326 /* Now assemble all these values with a single function call */ 327 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 328 i = j; 329 } 330 } 331 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 332 333 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 334 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 335 336 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 337 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 338 } 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 343 { 344 PetscErrorCode ierr; 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 347 PetscFunctionBegin; 348 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 /* the code does not do the diagonal entries correctly unless the 353 matrix is square and the column and row owerships are identical. 354 This is a BUG. The only way to fix it seems to be to access 355 mdn->A and mdn->B directly and not through the MatZeroRows() 356 routine. 357 */ 358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 359 { 360 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 361 PetscErrorCode ierr; 362 PetscInt i,*owners = A->rmap->range; 363 PetscInt *sizes,j,idx,nsends; 364 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 365 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 366 PetscInt *lens,*lrows,*values; 367 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 368 MPI_Comm comm; 369 MPI_Request *send_waits,*recv_waits; 370 MPI_Status recv_status,*send_status; 371 PetscBool found; 372 const PetscScalar *xx; 373 PetscScalar *bb; 374 375 PetscFunctionBegin; 376 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 377 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 378 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 379 /* first count number of contributors to each processor */ 380 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 381 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 382 for (i=0; i<N; i++) { 383 idx = rows[i]; 384 found = PETSC_FALSE; 385 for (j=0; j<size; j++) { 386 if (idx >= owners[j] && idx < owners[j+1]) { 387 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 388 } 389 } 390 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 391 } 392 nsends = 0; 393 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 394 395 /* inform other processors of number of messages and max length*/ 396 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 397 398 /* post receives: */ 399 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 400 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 401 for (i=0; i<nrecvs; i++) { 402 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 403 } 404 405 /* do sends: 406 1) starts[i] gives the starting index in svalues for stuff going to 407 the ith processor 408 */ 409 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 410 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 411 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 412 413 starts[0] = 0; 414 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 415 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 416 417 starts[0] = 0; 418 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 419 count = 0; 420 for (i=0; i<size; i++) { 421 if (sizes[2*i+1]) { 422 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 423 } 424 } 425 ierr = PetscFree(starts);CHKERRQ(ierr); 426 427 base = owners[rank]; 428 429 /* wait on receives */ 430 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 431 count = nrecvs; 432 slen = 0; 433 while (count) { 434 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 435 /* unpack receives into our local space */ 436 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 437 438 source[imdex] = recv_status.MPI_SOURCE; 439 lens[imdex] = n; 440 slen += n; 441 count--; 442 } 443 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 444 445 /* move the data into the send scatter */ 446 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 447 count = 0; 448 for (i=0; i<nrecvs; i++) { 449 values = rvalues + i*nmax; 450 for (j=0; j<lens[i]; j++) { 451 lrows[count++] = values[j] - base; 452 } 453 } 454 ierr = PetscFree(rvalues);CHKERRQ(ierr); 455 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 456 ierr = PetscFree(owner);CHKERRQ(ierr); 457 ierr = PetscFree(sizes);CHKERRQ(ierr); 458 459 /* fix right hand side if needed */ 460 if (x && b) { 461 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 462 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 463 for (i=0; i<slen; i++) { 464 bb[lrows[i]] = diag*xx[lrows[i]]; 465 } 466 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 467 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 468 } 469 470 /* actually zap the local rows */ 471 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 472 if (diag != 0.0) { 473 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 474 PetscInt m = ll->lda, i; 475 476 for (i=0; i<slen; i++) { 477 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 478 } 479 } 480 ierr = PetscFree(lrows);CHKERRQ(ierr); 481 482 /* wait on sends */ 483 if (nsends) { 484 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 485 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 486 ierr = PetscFree(send_status);CHKERRQ(ierr); 487 } 488 ierr = PetscFree(send_waits);CHKERRQ(ierr); 489 ierr = PetscFree(svalues);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 497 498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 499 { 500 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 501 PetscErrorCode ierr; 502 503 PetscFunctionBegin; 504 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 505 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 506 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 517 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 518 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 523 { 524 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 525 PetscErrorCode ierr; 526 PetscScalar zero = 0.0; 527 528 PetscFunctionBegin; 529 ierr = VecSet(yy,zero);CHKERRQ(ierr); 530 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 531 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 532 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 533 PetscFunctionReturn(0); 534 } 535 536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 537 { 538 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 543 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 544 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 545 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 550 { 551 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 552 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 553 PetscErrorCode ierr; 554 PetscInt len,i,n,m = A->rmap->n,radd; 555 PetscScalar *x,zero = 0.0; 556 557 PetscFunctionBegin; 558 ierr = VecSet(v,zero);CHKERRQ(ierr); 559 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 560 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 561 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 562 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 563 radd = A->rmap->rstart*m; 564 for (i=0; i<len; i++) { 565 x[i] = aloc->v[radd + i*m + i]; 566 } 567 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 568 PetscFunctionReturn(0); 569 } 570 571 PetscErrorCode MatDestroy_MPIDense(Mat mat) 572 { 573 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 574 PetscErrorCode ierr; 575 576 PetscFunctionBegin; 577 #if defined(PETSC_USE_LOG) 578 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 579 #endif 580 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 581 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 582 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 583 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 584 585 ierr = PetscFree(mat->data);CHKERRQ(ierr); 586 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 587 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 595 #if defined(PETSC_HAVE_ELEMENTAL) 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 597 #endif 598 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 599 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 601 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",NULL);CHKERRQ(ierr); 603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 605 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 608 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 609 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 614 615 #include <petscdraw.h> 616 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 617 { 618 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 619 PetscErrorCode ierr; 620 PetscMPIInt rank = mdn->rank; 621 PetscViewerType vtype; 622 PetscBool iascii,isdraw; 623 PetscViewer sviewer; 624 PetscViewerFormat format; 625 626 PetscFunctionBegin; 627 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 628 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 629 if (iascii) { 630 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 631 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 632 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 633 MatInfo info; 634 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 635 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 636 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 637 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 638 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 639 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 640 PetscFunctionReturn(0); 641 } else if (format == PETSC_VIEWER_ASCII_INFO) { 642 PetscFunctionReturn(0); 643 } 644 } else if (isdraw) { 645 PetscDraw draw; 646 PetscBool isnull; 647 648 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 649 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 650 if (isnull) PetscFunctionReturn(0); 651 } 652 653 { 654 /* assemble the entire matrix onto first processor. */ 655 Mat A; 656 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 657 PetscInt *cols; 658 PetscScalar *vals; 659 660 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 661 if (!rank) { 662 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 663 } else { 664 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 665 } 666 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 667 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 668 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 669 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 670 671 /* Copy the matrix ... This isn't the most efficient means, 672 but it's quick for now */ 673 A->insertmode = INSERT_VALUES; 674 675 row = mat->rmap->rstart; 676 m = mdn->A->rmap->n; 677 for (i=0; i<m; i++) { 678 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 679 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 680 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 681 row++; 682 } 683 684 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 686 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 687 if (!rank) { 688 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 689 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 690 } 691 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 692 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 693 ierr = MatDestroy(&A);CHKERRQ(ierr); 694 } 695 PetscFunctionReturn(0); 696 } 697 698 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 699 { 700 PetscErrorCode ierr; 701 PetscBool iascii,isbinary,isdraw,issocket; 702 703 PetscFunctionBegin; 704 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 705 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 708 709 if (iascii || issocket || isdraw) { 710 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 711 } else if (isbinary) { 712 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 713 } 714 PetscFunctionReturn(0); 715 } 716 717 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 718 { 719 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 720 Mat mdn = mat->A; 721 PetscErrorCode ierr; 722 PetscLogDouble isend[5],irecv[5]; 723 724 PetscFunctionBegin; 725 info->block_size = 1.0; 726 727 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 728 729 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 730 isend[3] = info->memory; isend[4] = info->mallocs; 731 if (flag == MAT_LOCAL) { 732 info->nz_used = isend[0]; 733 info->nz_allocated = isend[1]; 734 info->nz_unneeded = isend[2]; 735 info->memory = isend[3]; 736 info->mallocs = isend[4]; 737 } else if (flag == MAT_GLOBAL_MAX) { 738 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 739 740 info->nz_used = irecv[0]; 741 info->nz_allocated = irecv[1]; 742 info->nz_unneeded = irecv[2]; 743 info->memory = irecv[3]; 744 info->mallocs = irecv[4]; 745 } else if (flag == MAT_GLOBAL_SUM) { 746 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 747 748 info->nz_used = irecv[0]; 749 info->nz_allocated = irecv[1]; 750 info->nz_unneeded = irecv[2]; 751 info->memory = irecv[3]; 752 info->mallocs = irecv[4]; 753 } 754 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 755 info->fill_ratio_needed = 0; 756 info->factor_mallocs = 0; 757 PetscFunctionReturn(0); 758 } 759 760 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 761 { 762 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 763 PetscErrorCode ierr; 764 765 PetscFunctionBegin; 766 switch (op) { 767 case MAT_NEW_NONZERO_LOCATIONS: 768 case MAT_NEW_NONZERO_LOCATION_ERR: 769 case MAT_NEW_NONZERO_ALLOCATION_ERR: 770 MatCheckPreallocated(A,1); 771 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 772 break; 773 case MAT_ROW_ORIENTED: 774 MatCheckPreallocated(A,1); 775 a->roworiented = flg; 776 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 777 break; 778 case MAT_NEW_DIAGONALS: 779 case MAT_KEEP_NONZERO_PATTERN: 780 case MAT_USE_HASH_TABLE: 781 case MAT_SORTED_FULL: 782 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 783 break; 784 case MAT_IGNORE_OFF_PROC_ENTRIES: 785 a->donotstash = flg; 786 break; 787 case MAT_SYMMETRIC: 788 case MAT_STRUCTURALLY_SYMMETRIC: 789 case MAT_HERMITIAN: 790 case MAT_SYMMETRY_ETERNAL: 791 case MAT_IGNORE_LOWER_TRIANGULAR: 792 case MAT_IGNORE_ZERO_ENTRIES: 793 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 794 break; 795 default: 796 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 797 } 798 PetscFunctionReturn(0); 799 } 800 801 802 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 803 { 804 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 805 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 806 const PetscScalar *l,*r; 807 PetscScalar x,*v; 808 PetscErrorCode ierr; 809 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 810 811 PetscFunctionBegin; 812 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 813 if (ll) { 814 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 815 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 816 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 817 for (i=0; i<m; i++) { 818 x = l[i]; 819 v = mat->v + i; 820 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 821 } 822 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 823 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 824 } 825 if (rr) { 826 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 827 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 828 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 831 for (i=0; i<n; i++) { 832 x = r[i]; 833 v = mat->v + i*m; 834 for (j=0; j<m; j++) (*v++) *= x; 835 } 836 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 837 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 838 } 839 PetscFunctionReturn(0); 840 } 841 842 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 843 { 844 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 845 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 846 PetscErrorCode ierr; 847 PetscInt i,j; 848 PetscReal sum = 0.0; 849 PetscScalar *v = mat->v; 850 851 PetscFunctionBegin; 852 if (mdn->size == 1) { 853 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 854 } else { 855 if (type == NORM_FROBENIUS) { 856 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 857 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 858 } 859 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 860 *nrm = PetscSqrtReal(*nrm); 861 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 862 } else if (type == NORM_1) { 863 PetscReal *tmp,*tmp2; 864 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 865 *nrm = 0.0; 866 v = mat->v; 867 for (j=0; j<mdn->A->cmap->n; j++) { 868 for (i=0; i<mdn->A->rmap->n; i++) { 869 tmp[j] += PetscAbsScalar(*v); v++; 870 } 871 } 872 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 873 for (j=0; j<A->cmap->N; j++) { 874 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 875 } 876 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 877 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 878 } else if (type == NORM_INFINITY) { /* max row norm */ 879 PetscReal ntemp; 880 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 881 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 882 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 883 } 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 888 { 889 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 890 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 891 Mat B; 892 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 893 PetscErrorCode ierr; 894 PetscInt j,i; 895 PetscScalar *v; 896 897 PetscFunctionBegin; 898 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 899 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 900 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 901 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 902 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 903 } else { 904 B = *matout; 905 } 906 907 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 908 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 909 for (i=0; i<m; i++) rwork[i] = rstart + i; 910 for (j=0; j<n; j++) { 911 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 912 v += m; 913 } 914 ierr = PetscFree(rwork);CHKERRQ(ierr); 915 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 916 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 917 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 918 *matout = B; 919 } else { 920 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } 924 925 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 926 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 927 928 PetscErrorCode MatSetUp_MPIDense(Mat A) 929 { 930 PetscErrorCode ierr; 931 932 PetscFunctionBegin; 933 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 934 PetscFunctionReturn(0); 935 } 936 937 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 938 { 939 PetscErrorCode ierr; 940 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 941 942 PetscFunctionBegin; 943 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 944 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 PetscErrorCode MatConjugate_MPIDense(Mat mat) 949 { 950 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 951 PetscErrorCode ierr; 952 953 PetscFunctionBegin; 954 ierr = MatConjugate(a->A);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 PetscErrorCode MatRealPart_MPIDense(Mat A) 959 { 960 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 961 PetscErrorCode ierr; 962 963 PetscFunctionBegin; 964 ierr = MatRealPart(a->A);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 969 { 970 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 979 { 980 PetscErrorCode ierr; 981 PetscScalar *x; 982 const PetscScalar *a; 983 PetscInt lda; 984 985 PetscFunctionBegin; 986 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 987 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr); 988 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 989 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 990 ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr); 991 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 992 ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 997 998 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 999 { 1000 PetscErrorCode ierr; 1001 PetscInt i,n; 1002 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1003 PetscReal *work; 1004 1005 PetscFunctionBegin; 1006 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1007 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1008 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1009 if (type == NORM_2) { 1010 for (i=0; i<n; i++) work[i] *= work[i]; 1011 } 1012 if (type == NORM_INFINITY) { 1013 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1014 } else { 1015 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1016 } 1017 ierr = PetscFree(work);CHKERRQ(ierr); 1018 if (type == NORM_2) { 1019 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1025 { 1026 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1027 PetscErrorCode ierr; 1028 PetscScalar *a; 1029 PetscInt m,n,i; 1030 1031 PetscFunctionBegin; 1032 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1033 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1034 for (i=0; i<m*n; i++) { 1035 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1036 } 1037 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1042 1043 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1044 { 1045 PetscFunctionBegin; 1046 *missing = PETSC_FALSE; 1047 PetscFunctionReturn(0); 1048 } 1049 1050 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1051 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1052 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1053 1054 /* -------------------------------------------------------------------*/ 1055 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1056 MatGetRow_MPIDense, 1057 MatRestoreRow_MPIDense, 1058 MatMult_MPIDense, 1059 /* 4*/ MatMultAdd_MPIDense, 1060 MatMultTranspose_MPIDense, 1061 MatMultTransposeAdd_MPIDense, 1062 0, 1063 0, 1064 0, 1065 /* 10*/ 0, 1066 0, 1067 0, 1068 0, 1069 MatTranspose_MPIDense, 1070 /* 15*/ MatGetInfo_MPIDense, 1071 MatEqual_MPIDense, 1072 MatGetDiagonal_MPIDense, 1073 MatDiagonalScale_MPIDense, 1074 MatNorm_MPIDense, 1075 /* 20*/ MatAssemblyBegin_MPIDense, 1076 MatAssemblyEnd_MPIDense, 1077 MatSetOption_MPIDense, 1078 MatZeroEntries_MPIDense, 1079 /* 24*/ MatZeroRows_MPIDense, 1080 0, 1081 0, 1082 0, 1083 0, 1084 /* 29*/ MatSetUp_MPIDense, 1085 0, 1086 0, 1087 MatGetDiagonalBlock_MPIDense, 1088 0, 1089 /* 34*/ MatDuplicate_MPIDense, 1090 0, 1091 0, 1092 0, 1093 0, 1094 /* 39*/ MatAXPY_MPIDense, 1095 MatCreateSubMatrices_MPIDense, 1096 0, 1097 MatGetValues_MPIDense, 1098 0, 1099 /* 44*/ 0, 1100 MatScale_MPIDense, 1101 MatShift_Basic, 1102 0, 1103 0, 1104 /* 49*/ MatSetRandom_MPIDense, 1105 0, 1106 0, 1107 0, 1108 0, 1109 /* 54*/ 0, 1110 0, 1111 0, 1112 0, 1113 0, 1114 /* 59*/ MatCreateSubMatrix_MPIDense, 1115 MatDestroy_MPIDense, 1116 MatView_MPIDense, 1117 0, 1118 0, 1119 /* 64*/ 0, 1120 0, 1121 0, 1122 0, 1123 0, 1124 /* 69*/ 0, 1125 0, 1126 0, 1127 0, 1128 0, 1129 /* 74*/ 0, 1130 0, 1131 0, 1132 0, 1133 0, 1134 /* 79*/ 0, 1135 0, 1136 0, 1137 0, 1138 /* 83*/ MatLoad_MPIDense, 1139 0, 1140 0, 1141 0, 1142 0, 1143 0, 1144 #if defined(PETSC_HAVE_ELEMENTAL) 1145 /* 89*/ MatMatMult_MPIDense_MPIDense, 1146 MatMatMultSymbolic_MPIDense_MPIDense, 1147 #else 1148 /* 89*/ 0, 1149 0, 1150 #endif 1151 MatMatMultNumeric_MPIDense, 1152 0, 1153 0, 1154 /* 94*/ 0, 1155 MatMatTransposeMult_MPIDense_MPIDense, 1156 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1157 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1158 0, 1159 /* 99*/ 0, 1160 0, 1161 0, 1162 MatConjugate_MPIDense, 1163 0, 1164 /*104*/ 0, 1165 MatRealPart_MPIDense, 1166 MatImaginaryPart_MPIDense, 1167 0, 1168 0, 1169 /*109*/ 0, 1170 0, 1171 0, 1172 MatGetColumnVector_MPIDense, 1173 MatMissingDiagonal_MPIDense, 1174 /*114*/ 0, 1175 0, 1176 0, 1177 0, 1178 0, 1179 /*119*/ 0, 1180 0, 1181 0, 1182 0, 1183 0, 1184 /*124*/ 0, 1185 MatGetColumnNorms_MPIDense, 1186 0, 1187 0, 1188 0, 1189 /*129*/ 0, 1190 MatTransposeMatMult_MPIDense_MPIDense, 1191 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1192 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1193 0, 1194 /*134*/ 0, 1195 0, 1196 0, 1197 0, 1198 0, 1199 /*139*/ 0, 1200 0, 1201 0, 1202 0, 1203 0, 1204 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1205 }; 1206 1207 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1208 { 1209 Mat_MPIDense *a; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1214 mat->preallocated = PETSC_TRUE; 1215 /* Note: For now, when data is specified above, this assumes the user correctly 1216 allocates the local dense storage space. We should add error checking. */ 1217 1218 a = (Mat_MPIDense*)mat->data; 1219 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1220 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1221 a->nvec = mat->cmap->n; 1222 1223 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1224 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1225 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1226 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1227 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #if defined(PETSC_HAVE_ELEMENTAL) 1232 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1233 { 1234 Mat mat_elemental; 1235 PetscErrorCode ierr; 1236 PetscScalar *v; 1237 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1238 1239 PetscFunctionBegin; 1240 if (reuse == MAT_REUSE_MATRIX) { 1241 mat_elemental = *newmat; 1242 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1243 } else { 1244 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1245 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1246 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1247 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1248 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1249 } 1250 1251 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1252 for (i=0; i<N; i++) cols[i] = i; 1253 for (i=0; i<m; i++) rows[i] = rstart + i; 1254 1255 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1256 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1257 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1258 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1259 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1260 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1261 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1262 1263 if (reuse == MAT_INPLACE_MATRIX) { 1264 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1265 } else { 1266 *newmat = mat_elemental; 1267 } 1268 PetscFunctionReturn(0); 1269 } 1270 #endif 1271 1272 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1273 { 1274 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1275 PetscErrorCode ierr; 1276 1277 PetscFunctionBegin; 1278 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1283 { 1284 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1293 { 1294 PetscErrorCode ierr; 1295 Mat_MPIDense *mat; 1296 PetscInt m,nloc,N; 1297 1298 PetscFunctionBegin; 1299 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1300 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1301 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1302 PetscInt sum; 1303 1304 if (n == PETSC_DECIDE) { 1305 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1306 } 1307 /* Check sum(n) = N */ 1308 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1309 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1310 1311 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1312 } 1313 1314 /* numeric phase */ 1315 mat = (Mat_MPIDense*)(*outmat)->data; 1316 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1317 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1318 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1323 { 1324 Mat_MPIDense *a; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1329 mat->data = (void*)a; 1330 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1331 1332 mat->insertmode = NOT_SET_VALUES; 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1334 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1335 1336 /* build cache for off array entries formed */ 1337 a->donotstash = PETSC_FALSE; 1338 1339 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1340 1341 /* stuff used for matrix vector multiply */ 1342 a->lvec = 0; 1343 a->Mvctx = 0; 1344 a->roworiented = PETSC_TRUE; 1345 1346 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1347 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1348 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1349 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1353 #if defined(PETSC_HAVE_ELEMENTAL) 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1355 #endif 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",MatMatMult_Nest_Dense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 1363 1364 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1365 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1366 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1367 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1368 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1369 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 /*MC 1374 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1375 1376 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1377 and MATMPIDENSE otherwise. 1378 1379 Options Database Keys: 1380 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1381 1382 Level: beginner 1383 1384 1385 .seealso: MATSEQDENSE,MATMPIDENSE 1386 M*/ 1387 1388 /*@C 1389 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1390 1391 Not collective 1392 1393 Input Parameters: 1394 . B - the matrix 1395 - data - optional location of matrix data. Set data=NULL for PETSc 1396 to control all matrix memory allocation. 1397 1398 Notes: 1399 The dense format is fully compatible with standard Fortran 77 1400 storage by columns. 1401 1402 The data input variable is intended primarily for Fortran programmers 1403 who wish to allocate their own matrix memory space. Most users should 1404 set data=NULL. 1405 1406 Level: intermediate 1407 1408 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1409 @*/ 1410 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1411 { 1412 PetscErrorCode ierr; 1413 1414 PetscFunctionBegin; 1415 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /*@ 1420 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1421 array provided by the user. This is useful to avoid copying an array 1422 into a matrix 1423 1424 Not Collective 1425 1426 Input Parameters: 1427 + mat - the matrix 1428 - array - the array in column major order 1429 1430 Notes: 1431 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1432 freed when the matrix is destroyed. 1433 1434 Level: developer 1435 1436 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1437 1438 @*/ 1439 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1440 { 1441 PetscErrorCode ierr; 1442 PetscFunctionBegin; 1443 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1444 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 /*@ 1449 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1450 1451 Not Collective 1452 1453 Input Parameters: 1454 . mat - the matrix 1455 1456 Notes: 1457 You can only call this after a call to MatDensePlaceArray() 1458 1459 Level: developer 1460 1461 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1462 1463 @*/ 1464 PetscErrorCode MatDenseResetArray(Mat mat) 1465 { 1466 PetscErrorCode ierr; 1467 PetscFunctionBegin; 1468 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1469 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1470 PetscFunctionReturn(0); 1471 } 1472 1473 /*@C 1474 MatCreateDense - Creates a parallel matrix in dense format. 1475 1476 Collective 1477 1478 Input Parameters: 1479 + comm - MPI communicator 1480 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1481 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1482 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1483 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1484 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1485 to control all matrix memory allocation. 1486 1487 Output Parameter: 1488 . A - the matrix 1489 1490 Notes: 1491 The dense format is fully compatible with standard Fortran 77 1492 storage by columns. 1493 1494 The data input variable is intended primarily for Fortran programmers 1495 who wish to allocate their own matrix memory space. Most users should 1496 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1497 1498 The user MUST specify either the local or global matrix dimensions 1499 (possibly both). 1500 1501 Level: intermediate 1502 1503 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1504 @*/ 1505 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1506 { 1507 PetscErrorCode ierr; 1508 PetscMPIInt size; 1509 1510 PetscFunctionBegin; 1511 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1512 PetscValidLogicalCollectiveBool(*A,!!data,6); 1513 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1514 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1515 if (size > 1) { 1516 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1517 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1518 if (data) { /* user provided data array, so no need to assemble */ 1519 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1520 (*A)->assembled = PETSC_TRUE; 1521 } 1522 } else { 1523 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1524 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1525 } 1526 PetscFunctionReturn(0); 1527 } 1528 1529 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1530 { 1531 Mat mat; 1532 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 *newmat = 0; 1537 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1538 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1539 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1540 a = (Mat_MPIDense*)mat->data; 1541 1542 mat->factortype = A->factortype; 1543 mat->assembled = PETSC_TRUE; 1544 mat->preallocated = PETSC_TRUE; 1545 1546 a->size = oldmat->size; 1547 a->rank = oldmat->rank; 1548 mat->insertmode = NOT_SET_VALUES; 1549 a->nvec = oldmat->nvec; 1550 a->donotstash = oldmat->donotstash; 1551 1552 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1553 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1554 1555 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1556 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1557 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1558 1559 *newmat = mat; 1560 PetscFunctionReturn(0); 1561 } 1562 1563 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 1564 { 1565 PetscErrorCode ierr; 1566 PetscBool isbinary; 1567 #if defined(PETSC_HAVE_HDF5) 1568 PetscBool ishdf5; 1569 #endif 1570 1571 PetscFunctionBegin; 1572 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 1573 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 1574 /* force binary viewer to load .info file if it has not yet done so */ 1575 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1576 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1577 #if defined(PETSC_HAVE_HDF5) 1578 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 1579 #endif 1580 if (isbinary) { 1581 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 1582 #if defined(PETSC_HAVE_HDF5) 1583 } else if (ishdf5) { 1584 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 1585 #endif 1586 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1591 { 1592 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1593 Mat a,b; 1594 PetscBool flg; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 a = matA->A; 1599 b = matB->A; 1600 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1601 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1606 { 1607 PetscErrorCode ierr; 1608 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1609 Mat_TransMatMultDense *atb = a->atbdense; 1610 1611 PetscFunctionBegin; 1612 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1613 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1614 ierr = PetscFree(atb);CHKERRQ(ierr); 1615 PetscFunctionReturn(0); 1616 } 1617 1618 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1619 { 1620 PetscErrorCode ierr; 1621 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1622 Mat_MatTransMultDense *abt = a->abtdense; 1623 1624 PetscFunctionBegin; 1625 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1626 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1627 ierr = (abt->destroy)(A);CHKERRQ(ierr); 1628 ierr = PetscFree(abt);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1633 { 1634 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1635 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1636 Mat_TransMatMultDense *atb = c->atbdense; 1637 PetscErrorCode ierr; 1638 MPI_Comm comm; 1639 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1640 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1641 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1642 PetscScalar _DOne=1.0,_DZero=0.0; 1643 PetscBLASInt am,an,bn,aN; 1644 const PetscInt *ranges; 1645 1646 PetscFunctionBegin; 1647 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1648 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1650 1651 /* compute atbarray = aseq^T * bseq */ 1652 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1653 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1654 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1655 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1656 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1657 1658 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1659 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1660 1661 /* arrange atbarray into sendbuf */ 1662 k = 0; 1663 for (proc=0; proc<size; proc++) { 1664 for (j=0; j<cN; j++) { 1665 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1666 } 1667 } 1668 /* sum all atbarray to local values of C */ 1669 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1670 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1671 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1672 PetscFunctionReturn(0); 1673 } 1674 1675 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1676 { 1677 PetscErrorCode ierr; 1678 Mat Cdense; 1679 MPI_Comm comm; 1680 PetscMPIInt size; 1681 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1682 Mat_MPIDense *c; 1683 Mat_TransMatMultDense *atb; 1684 1685 PetscFunctionBegin; 1686 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1687 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1688 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1689 } 1690 1691 /* create matrix product Cdense */ 1692 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1693 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1694 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1695 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1696 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 *C = Cdense; 1699 1700 /* create data structure for reuse Cdense */ 1701 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1702 ierr = PetscNew(&atb);CHKERRQ(ierr); 1703 cM = Cdense->rmap->N; 1704 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1705 1706 c = (Mat_MPIDense*)Cdense->data; 1707 c->atbdense = atb; 1708 atb->destroy = Cdense->ops->destroy; 1709 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1710 PetscFunctionReturn(0); 1711 } 1712 1713 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1714 { 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 if (scall == MAT_INITIAL_MATRIX) { 1719 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1720 } 1721 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 1726 { 1727 PetscErrorCode ierr; 1728 Mat Cdense; 1729 MPI_Comm comm; 1730 PetscMPIInt i, size; 1731 PetscInt maxRows, bufsiz; 1732 Mat_MPIDense *c; 1733 PetscMPIInt tag; 1734 const char *algTypes[2] = {"allgatherv","cyclic"}; 1735 PetscInt alg, nalg = 2; 1736 Mat_MatTransMultDense *abt; 1737 1738 PetscFunctionBegin; 1739 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1740 alg = 0; /* default is allgatherv */ 1741 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 1742 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 1743 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1744 if (A->cmap->N != B->cmap->N) { 1745 SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 1746 } 1747 1748 /* create matrix product Cdense */ 1749 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1750 ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 1751 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1752 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1753 ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 1754 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1755 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1756 *C = Cdense; 1757 1758 /* create data structure for reuse Cdense */ 1759 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1760 ierr = PetscNew(&abt);CHKERRQ(ierr); 1761 abt->tag = tag; 1762 abt->alg = alg; 1763 switch (alg) { 1764 case 1: 1765 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 1766 bufsiz = A->cmap->N * maxRows; 1767 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 1768 break; 1769 default: 1770 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 1771 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 1772 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 1773 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 1774 break; 1775 } 1776 1777 c = (Mat_MPIDense*)Cdense->data; 1778 c->abtdense = abt; 1779 abt->destroy = Cdense->ops->destroy; 1780 Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 1781 PetscFunctionReturn(0); 1782 } 1783 1784 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 1785 { 1786 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1787 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1788 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 1789 Mat_MatTransMultDense *abt = c->abtdense; 1790 PetscErrorCode ierr; 1791 MPI_Comm comm; 1792 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 1793 PetscScalar *sendbuf, *recvbuf=0, *carray; 1794 PetscInt i,cK=A->cmap->N,k,j,bn; 1795 PetscScalar _DOne=1.0,_DZero=0.0; 1796 PetscBLASInt cm, cn, ck; 1797 MPI_Request reqs[2]; 1798 const PetscInt *ranges; 1799 1800 PetscFunctionBegin; 1801 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1802 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1803 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1804 1805 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 1806 bn = B->rmap->n; 1807 if (bseq->lda == bn) { 1808 sendbuf = bseq->v; 1809 } else { 1810 sendbuf = abt->buf[0]; 1811 for (k = 0, i = 0; i < cK; i++) { 1812 for (j = 0; j < bn; j++, k++) { 1813 sendbuf[k] = bseq->v[i * bseq->lda + j]; 1814 } 1815 } 1816 } 1817 if (size > 1) { 1818 sendto = (rank + size - 1) % size; 1819 recvfrom = (rank + size + 1) % size; 1820 } else { 1821 sendto = recvfrom = 0; 1822 } 1823 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 1824 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 1825 recvisfrom = rank; 1826 for (i = 0; i < size; i++) { 1827 /* we have finished receiving in sending, bufs can be read/modified */ 1828 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 1829 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 1830 1831 if (nextrecvisfrom != rank) { 1832 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 1833 sendsiz = cK * bn; 1834 recvsiz = cK * nextbn; 1835 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 1836 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 1837 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 1838 } 1839 1840 /* local aseq * sendbuf^T */ 1841 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 1842 carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 1843 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 1844 1845 if (nextrecvisfrom != rank) { 1846 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 1847 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 1848 } 1849 bn = nextbn; 1850 recvisfrom = nextrecvisfrom; 1851 sendbuf = recvbuf; 1852 } 1853 PetscFunctionReturn(0); 1854 } 1855 1856 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 1857 { 1858 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1859 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1860 Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 1861 Mat_MatTransMultDense *abt = c->abtdense; 1862 PetscErrorCode ierr; 1863 MPI_Comm comm; 1864 PetscMPIInt rank,size; 1865 PetscScalar *sendbuf, *recvbuf; 1866 PetscInt i,cK=A->cmap->N,k,j,bn; 1867 PetscScalar _DOne=1.0,_DZero=0.0; 1868 PetscBLASInt cm, cn, ck; 1869 1870 PetscFunctionBegin; 1871 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1872 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1873 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1874 1875 /* copy transpose of B into buf[0] */ 1876 bn = B->rmap->n; 1877 sendbuf = abt->buf[0]; 1878 recvbuf = abt->buf[1]; 1879 for (k = 0, j = 0; j < bn; j++) { 1880 for (i = 0; i < cK; i++, k++) { 1881 sendbuf[k] = bseq->v[i * bseq->lda + j]; 1882 } 1883 } 1884 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 1885 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 1886 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 1887 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 1888 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 1893 { 1894 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1895 Mat_MatTransMultDense *abt = c->abtdense; 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 switch (abt->alg) { 1900 case 1: 1901 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 1902 break; 1903 default: 1904 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 1905 break; 1906 } 1907 PetscFunctionReturn(0); 1908 } 1909 1910 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 1911 { 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 if (scall == MAT_INITIAL_MATRIX) { 1916 ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1917 } 1918 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1923 { 1924 PetscErrorCode ierr; 1925 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1926 Mat_MatMultDense *ab = a->abdense; 1927 1928 PetscFunctionBegin; 1929 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1930 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1931 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1932 1933 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1934 ierr = PetscFree(ab);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 #if defined(PETSC_HAVE_ELEMENTAL) 1939 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1940 { 1941 PetscErrorCode ierr; 1942 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1943 Mat_MatMultDense *ab=c->abdense; 1944 1945 PetscFunctionBegin; 1946 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1947 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1948 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1949 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1950 PetscFunctionReturn(0); 1951 } 1952 1953 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1954 { 1955 PetscErrorCode ierr; 1956 Mat Ae,Be,Ce; 1957 Mat_MPIDense *c; 1958 Mat_MatMultDense *ab; 1959 1960 PetscFunctionBegin; 1961 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1962 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1963 } 1964 1965 /* convert A and B to Elemental matrices Ae and Be */ 1966 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1967 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1968 1969 /* Ce = Ae*Be */ 1970 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1971 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1972 1973 /* convert Ce to C */ 1974 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1975 1976 /* create data structure for reuse Cdense */ 1977 ierr = PetscNew(&ab);CHKERRQ(ierr); 1978 c = (Mat_MPIDense*)(*C)->data; 1979 c->abdense = ab; 1980 1981 ab->Ae = Ae; 1982 ab->Be = Be; 1983 ab->Ce = Ce; 1984 ab->destroy = (*C)->ops->destroy; 1985 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1986 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1987 PetscFunctionReturn(0); 1988 } 1989 1990 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1991 { 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 if (scall == MAT_INITIAL_MATRIX) { /* symbolic product includes numeric product */ 1996 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1997 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1998 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1999 } else { 2000 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2001 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2002 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 #endif 2007 2008