1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseGetLocalMatrix" 13 /*@ 14 15 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the Seq or MPI dense matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 @*/ 27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 28 { 29 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 30 PetscErrorCode ierr; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 35 if (flg) *B = mat->A; 36 else *B = A; 37 PetscFunctionReturn(0); 38 } 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatGetRow_MPIDense" 42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 43 { 44 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 45 PetscErrorCode ierr; 46 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 47 48 PetscFunctionBegin; 49 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 50 lrow = row - rstart; 51 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 55 #undef __FUNCT__ 56 #define __FUNCT__ "MatRestoreRow_MPIDense" 57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 58 { 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 63 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 Mat B; 77 78 PetscFunctionBegin; 79 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 80 81 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 82 if (!B) { 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 for (i=0; i<m; i++) { 110 if (idxm[i] < 0) continue; 111 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 112 if (idxm[i] >= rstart && idxm[i] < rend) { 113 row = idxm[i] - rstart; 114 if (roworiented) { 115 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 116 } else { 117 for (j=0; j<n; j++) { 118 if (idxn[j] < 0) continue; 119 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 120 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 121 } 122 } 123 } else if (!A->donotstash) { 124 mat->assembled = PETSC_FALSE; 125 if (roworiented) { 126 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 127 } else { 128 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetValues_MPIDense" 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatDenseGetArray_MPIDense" 161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 174 { 175 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 176 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 177 PetscErrorCode ierr; 178 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 179 const PetscInt *irow,*icol; 180 PetscScalar *av,*bv,*v = lmat->v; 181 Mat newmat; 182 IS iscol_local; 183 184 PetscFunctionBegin; 185 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 186 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 187 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 190 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 191 192 /* No parallel redistribution currently supported! Should really check each index set 193 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 194 original matrix! */ 195 196 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 197 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 198 199 /* Check submatrix call */ 200 if (scall == MAT_REUSE_MATRIX) { 201 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 202 /* Really need to test rows and column sizes! */ 203 newmat = *B; 204 } else { 205 /* Create and fill new matrix */ 206 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 207 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 208 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 209 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 210 } 211 212 /* Now extract the data pointers and do the copy, column at a time */ 213 newmatd = (Mat_MPIDense*)newmat->data; 214 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 215 216 for (i=0; i<Ncols; i++) { 217 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 218 for (j=0; j<nrows; j++) { 219 *bv++ = av[irow[j] - rstart]; 220 } 221 } 222 223 /* Assemble the matrices so that the correct flags are set */ 224 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 227 /* Free work space */ 228 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 230 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 231 *B = newmat; 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 280 PetscFunctionBegin; 281 /* wait on receives */ 282 while (1) { 283 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 284 if (!flg) break; 285 286 for (i=0; i<n;) { 287 /* Now identify the consecutive vals belonging to the same row */ 288 for (j=i,rstart=row[j]; j<n; j++) { 289 if (row[j] != rstart) break; 290 } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap->range; 334 PetscInt *sizes,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscBool found; 343 const PetscScalar *xx; 344 PetscScalar *bb; 345 346 PetscFunctionBegin; 347 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 348 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 352 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 353 for (i=0; i<N; i++) { 354 idx = rows[i]; 355 found = PETSC_FALSE; 356 for (j=0; j<size; j++) { 357 if (idx >= owners[j] && idx < owners[j+1]) { 358 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 359 } 360 } 361 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 362 } 363 nsends = 0; 364 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 365 366 /* inform other processors of number of messages and max length*/ 367 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 368 369 /* post receives: */ 370 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 371 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 372 for (i=0; i<nrecvs; i++) { 373 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 374 } 375 376 /* do sends: 377 1) starts[i] gives the starting index in svalues for stuff going to 378 the ith processor 379 */ 380 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 381 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 382 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 383 384 starts[0] = 0; 385 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 386 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 387 388 starts[0] = 0; 389 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 390 count = 0; 391 for (i=0; i<size; i++) { 392 if (sizes[2*i+1]) { 393 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 394 } 395 } 396 ierr = PetscFree(starts);CHKERRQ(ierr); 397 398 base = owners[rank]; 399 400 /* wait on receives */ 401 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 402 count = nrecvs; 403 slen = 0; 404 while (count) { 405 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 406 /* unpack receives into our local space */ 407 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 408 409 source[imdex] = recv_status.MPI_SOURCE; 410 lens[imdex] = n; 411 slen += n; 412 count--; 413 } 414 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 415 416 /* move the data into the send scatter */ 417 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 418 count = 0; 419 for (i=0; i<nrecvs; i++) { 420 values = rvalues + i*nmax; 421 for (j=0; j<lens[i]; j++) { 422 lrows[count++] = values[j] - base; 423 } 424 } 425 ierr = PetscFree(rvalues);CHKERRQ(ierr); 426 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 427 ierr = PetscFree(owner);CHKERRQ(ierr); 428 ierr = PetscFree(sizes);CHKERRQ(ierr); 429 430 /* fix right hand side if needed */ 431 if (x && b) { 432 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 433 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 434 for (i=0; i<slen; i++) { 435 bb[lrows[i]] = diag*xx[lrows[i]]; 436 } 437 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 439 } 440 441 /* actually zap the local rows */ 442 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 443 if (diag != 0.0) { 444 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 445 PetscInt m = ll->lda, i; 446 447 for (i=0; i<slen; i++) { 448 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 449 } 450 } 451 ierr = PetscFree(lrows);CHKERRQ(ierr); 452 453 /* wait on sends */ 454 if (nsends) { 455 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 456 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 457 ierr = PetscFree(send_status);CHKERRQ(ierr); 458 } 459 ierr = PetscFree(send_waits);CHKERRQ(ierr); 460 ierr = PetscFree(svalues);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 465 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 466 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 467 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMult_MPIDense" 471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatMultAdd_MPIDense" 485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 488 PetscErrorCode ierr; 489 490 PetscFunctionBegin; 491 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatMultTranspose_MPIDense" 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 PetscErrorCode ierr; 503 PetscScalar zero = 0.0; 504 505 PetscFunctionBegin; 506 ierr = VecSet(yy,zero);CHKERRQ(ierr); 507 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 508 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 516 { 517 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 518 PetscErrorCode ierr; 519 520 PetscFunctionBegin; 521 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 522 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 523 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatGetDiagonal_MPIDense" 530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 534 PetscErrorCode ierr; 535 PetscInt len,i,n,m = A->rmap->n,radd; 536 PetscScalar *x,zero = 0.0; 537 538 PetscFunctionBegin; 539 ierr = VecSet(v,zero);CHKERRQ(ierr); 540 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 541 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 542 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 543 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 544 radd = A->rmap->rstart*m; 545 for (i=0; i<len; i++) { 546 x[i] = aloc->v[radd + i*m + i]; 547 } 548 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatDestroy_MPIDense" 554 PetscErrorCode MatDestroy_MPIDense(Mat mat) 555 { 556 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 557 PetscErrorCode ierr; 558 559 PetscFunctionBegin; 560 #if defined(PETSC_USE_LOG) 561 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 562 #endif 563 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 564 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 565 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 566 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 567 568 ierr = PetscFree(mat->data);CHKERRQ(ierr); 569 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 570 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 573 #if defined(PETSC_HAVE_ELEMENTAL) 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 575 #endif 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatView_MPIDense_Binary" 588 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 589 { 590 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 591 PetscErrorCode ierr; 592 PetscViewerFormat format; 593 int fd; 594 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 595 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 596 PetscScalar *work,*v,*vv; 597 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 598 599 PetscFunctionBegin; 600 if (mdn->size == 1) { 601 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 602 } else { 603 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 604 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 605 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 606 607 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 608 if (format == PETSC_VIEWER_NATIVE) { 609 610 if (!rank) { 611 /* store the matrix as a dense matrix */ 612 header[0] = MAT_FILE_CLASSID; 613 header[1] = mat->rmap->N; 614 header[2] = N; 615 header[3] = MATRIX_BINARY_FORMAT_DENSE; 616 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 617 618 /* get largest work array needed for transposing array */ 619 mmax = mat->rmap->n; 620 for (i=1; i<size; i++) { 621 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 622 } 623 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 624 625 /* write out local array, by rows */ 626 m = mat->rmap->n; 627 v = a->v; 628 for (j=0; j<N; j++) { 629 for (i=0; i<m; i++) { 630 work[j + i*N] = *v++; 631 } 632 } 633 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 634 /* get largest work array to receive messages from other processes, excludes process zero */ 635 mmax = 0; 636 for (i=1; i<size; i++) { 637 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 638 } 639 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 640 for (k = 1; k < size; k++) { 641 v = vv; 642 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 643 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 644 645 for (j = 0; j < N; j++) { 646 for (i = 0; i < m; i++) { 647 work[j + i*N] = *v++; 648 } 649 } 650 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 651 } 652 ierr = PetscFree(work);CHKERRQ(ierr); 653 ierr = PetscFree(vv);CHKERRQ(ierr); 654 } else { 655 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 656 } 657 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 658 } 659 PetscFunctionReturn(0); 660 } 661 662 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 663 #include <petscdraw.h> 664 #undef __FUNCT__ 665 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 666 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 667 { 668 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 669 PetscErrorCode ierr; 670 PetscMPIInt rank = mdn->rank; 671 PetscViewerType vtype; 672 PetscBool iascii,isdraw; 673 PetscViewer sviewer; 674 PetscViewerFormat format; 675 676 PetscFunctionBegin; 677 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 678 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 679 if (iascii) { 680 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 681 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 682 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 683 MatInfo info; 684 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 685 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 686 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 688 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 689 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } else if (format == PETSC_VIEWER_ASCII_INFO) { 692 PetscFunctionReturn(0); 693 } 694 } else if (isdraw) { 695 PetscDraw draw; 696 PetscBool isnull; 697 698 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 699 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 700 if (isnull) PetscFunctionReturn(0); 701 } 702 703 { 704 /* assemble the entire matrix onto first processor. */ 705 Mat A; 706 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 707 PetscInt *cols; 708 PetscScalar *vals; 709 710 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 711 if (!rank) { 712 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 713 } else { 714 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 715 } 716 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 717 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 718 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 719 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 720 721 /* Copy the matrix ... This isn't the most efficient means, 722 but it's quick for now */ 723 A->insertmode = INSERT_VALUES; 724 725 row = mat->rmap->rstart; 726 m = mdn->A->rmap->n; 727 for (i=0; i<m; i++) { 728 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 729 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 730 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 731 row++; 732 } 733 734 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 735 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 737 if (!rank) { 738 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 739 } 740 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 741 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 742 ierr = MatDestroy(&A);CHKERRQ(ierr); 743 } 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatView_MPIDense" 749 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 750 { 751 PetscErrorCode ierr; 752 PetscBool iascii,isbinary,isdraw,issocket; 753 754 PetscFunctionBegin; 755 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 756 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 757 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 759 760 if (iascii || issocket || isdraw) { 761 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 762 } else if (isbinary) { 763 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatGetInfo_MPIDense" 770 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 771 { 772 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 773 Mat mdn = mat->A; 774 PetscErrorCode ierr; 775 PetscReal isend[5],irecv[5]; 776 777 PetscFunctionBegin; 778 info->block_size = 1.0; 779 780 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 781 782 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 783 isend[3] = info->memory; isend[4] = info->mallocs; 784 if (flag == MAT_LOCAL) { 785 info->nz_used = isend[0]; 786 info->nz_allocated = isend[1]; 787 info->nz_unneeded = isend[2]; 788 info->memory = isend[3]; 789 info->mallocs = isend[4]; 790 } else if (flag == MAT_GLOBAL_MAX) { 791 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } else if (flag == MAT_GLOBAL_SUM) { 799 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 800 801 info->nz_used = irecv[0]; 802 info->nz_allocated = irecv[1]; 803 info->nz_unneeded = irecv[2]; 804 info->memory = irecv[3]; 805 info->mallocs = irecv[4]; 806 } 807 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 808 info->fill_ratio_needed = 0; 809 info->factor_mallocs = 0; 810 PetscFunctionReturn(0); 811 } 812 813 #undef __FUNCT__ 814 #define __FUNCT__ "MatSetOption_MPIDense" 815 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 816 { 817 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 818 PetscErrorCode ierr; 819 820 PetscFunctionBegin; 821 switch (op) { 822 case MAT_NEW_NONZERO_LOCATIONS: 823 case MAT_NEW_NONZERO_LOCATION_ERR: 824 case MAT_NEW_NONZERO_ALLOCATION_ERR: 825 MatCheckPreallocated(A,1); 826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 827 break; 828 case MAT_ROW_ORIENTED: 829 MatCheckPreallocated(A,1); 830 a->roworiented = flg; 831 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 832 break; 833 case MAT_NEW_DIAGONALS: 834 case MAT_KEEP_NONZERO_PATTERN: 835 case MAT_USE_HASH_TABLE: 836 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 837 break; 838 case MAT_IGNORE_OFF_PROC_ENTRIES: 839 a->donotstash = flg; 840 break; 841 case MAT_SYMMETRIC: 842 case MAT_STRUCTURALLY_SYMMETRIC: 843 case MAT_HERMITIAN: 844 case MAT_SYMMETRY_ETERNAL: 845 case MAT_IGNORE_LOWER_TRIANGULAR: 846 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 847 break; 848 default: 849 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 850 } 851 PetscFunctionReturn(0); 852 } 853 854 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatDiagonalScale_MPIDense" 857 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 858 { 859 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 860 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 861 PetscScalar *l,*r,x,*v; 862 PetscErrorCode ierr; 863 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 864 865 PetscFunctionBegin; 866 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 867 if (ll) { 868 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 869 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 870 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 871 for (i=0; i<m; i++) { 872 x = l[i]; 873 v = mat->v + i; 874 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 875 } 876 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 877 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 878 } 879 if (rr) { 880 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 881 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 882 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 885 for (i=0; i<n; i++) { 886 x = r[i]; 887 v = mat->v + i*m; 888 for (j=0; j<m; j++) (*v++) *= x; 889 } 890 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 891 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 892 } 893 PetscFunctionReturn(0); 894 } 895 896 #undef __FUNCT__ 897 #define __FUNCT__ "MatNorm_MPIDense" 898 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 899 { 900 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 901 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 902 PetscErrorCode ierr; 903 PetscInt i,j; 904 PetscReal sum = 0.0; 905 PetscScalar *v = mat->v; 906 907 PetscFunctionBegin; 908 if (mdn->size == 1) { 909 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 910 } else { 911 if (type == NORM_FROBENIUS) { 912 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 913 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 914 } 915 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 916 *nrm = PetscSqrtReal(*nrm); 917 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 918 } else if (type == NORM_1) { 919 PetscReal *tmp,*tmp2; 920 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 921 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 922 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 923 *nrm = 0.0; 924 v = mat->v; 925 for (j=0; j<mdn->A->cmap->n; j++) { 926 for (i=0; i<mdn->A->rmap->n; i++) { 927 tmp[j] += PetscAbsScalar(*v); v++; 928 } 929 } 930 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 931 for (j=0; j<A->cmap->N; j++) { 932 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 933 } 934 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 935 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 936 } else if (type == NORM_INFINITY) { /* max row norm */ 937 PetscReal ntemp; 938 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 939 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 940 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatTranspose_MPIDense" 947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 948 { 949 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 950 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 951 Mat B; 952 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 953 PetscErrorCode ierr; 954 PetscInt j,i; 955 PetscScalar *v; 956 957 PetscFunctionBegin; 958 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 959 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 960 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 961 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 962 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 963 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 964 } else { 965 B = *matout; 966 } 967 968 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 969 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 970 for (i=0; i<m; i++) rwork[i] = rstart + i; 971 for (j=0; j<n; j++) { 972 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 973 v += m; 974 } 975 ierr = PetscFree(rwork);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 979 *matout = B; 980 } else { 981 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 982 } 983 PetscFunctionReturn(0); 984 } 985 986 987 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 988 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatSetUp_MPIDense" 992 PetscErrorCode MatSetUp_MPIDense(Mat A) 993 { 994 PetscErrorCode ierr; 995 996 PetscFunctionBegin; 997 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatAXPY_MPIDense" 1003 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1004 { 1005 PetscErrorCode ierr; 1006 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1007 1008 PetscFunctionBegin; 1009 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1010 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatConjugate_MPIDense" 1016 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1017 { 1018 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1019 PetscErrorCode ierr; 1020 1021 PetscFunctionBegin; 1022 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatRealPart_MPIDense" 1028 PetscErrorCode MatRealPart_MPIDense(Mat A) 1029 { 1030 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1040 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1041 { 1042 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1053 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1054 { 1055 PetscErrorCode ierr; 1056 PetscInt i,n; 1057 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1058 PetscReal *work; 1059 1060 PetscFunctionBegin; 1061 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1062 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1063 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1064 if (type == NORM_2) { 1065 for (i=0; i<n; i++) work[i] *= work[i]; 1066 } 1067 if (type == NORM_INFINITY) { 1068 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1069 } else { 1070 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(work);CHKERRQ(ierr); 1073 if (type == NORM_2) { 1074 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatSetRandom_MPIDense" 1081 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1082 { 1083 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1084 PetscErrorCode ierr; 1085 PetscScalar *a; 1086 PetscInt m,n,i; 1087 1088 PetscFunctionBegin; 1089 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1090 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1091 for (i=0; i<m*n; i++) { 1092 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1093 } 1094 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMissingDiagonal_MPIDense" 1102 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1103 { 1104 PetscFunctionBegin; 1105 *missing = PETSC_FALSE; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 /* -------------------------------------------------------------------*/ 1110 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1111 MatGetRow_MPIDense, 1112 MatRestoreRow_MPIDense, 1113 MatMult_MPIDense, 1114 /* 4*/ MatMultAdd_MPIDense, 1115 MatMultTranspose_MPIDense, 1116 MatMultTransposeAdd_MPIDense, 1117 0, 1118 0, 1119 0, 1120 /* 10*/ 0, 1121 0, 1122 0, 1123 0, 1124 MatTranspose_MPIDense, 1125 /* 15*/ MatGetInfo_MPIDense, 1126 MatEqual_MPIDense, 1127 MatGetDiagonal_MPIDense, 1128 MatDiagonalScale_MPIDense, 1129 MatNorm_MPIDense, 1130 /* 20*/ MatAssemblyBegin_MPIDense, 1131 MatAssemblyEnd_MPIDense, 1132 MatSetOption_MPIDense, 1133 MatZeroEntries_MPIDense, 1134 /* 24*/ MatZeroRows_MPIDense, 1135 0, 1136 0, 1137 0, 1138 0, 1139 /* 29*/ MatSetUp_MPIDense, 1140 0, 1141 0, 1142 MatGetDiagonalBlock_MPIDense, 1143 0, 1144 /* 34*/ MatDuplicate_MPIDense, 1145 0, 1146 0, 1147 0, 1148 0, 1149 /* 39*/ MatAXPY_MPIDense, 1150 MatGetSubMatrices_MPIDense, 1151 0, 1152 MatGetValues_MPIDense, 1153 0, 1154 /* 44*/ 0, 1155 MatScale_MPIDense, 1156 MatShift_Basic, 1157 0, 1158 0, 1159 /* 49*/ MatSetRandom_MPIDense, 1160 0, 1161 0, 1162 0, 1163 0, 1164 /* 54*/ 0, 1165 0, 1166 0, 1167 0, 1168 0, 1169 /* 59*/ MatGetSubMatrix_MPIDense, 1170 MatDestroy_MPIDense, 1171 MatView_MPIDense, 1172 0, 1173 0, 1174 /* 64*/ 0, 1175 0, 1176 0, 1177 0, 1178 0, 1179 /* 69*/ 0, 1180 0, 1181 0, 1182 0, 1183 0, 1184 /* 74*/ 0, 1185 0, 1186 0, 1187 0, 1188 0, 1189 /* 79*/ 0, 1190 0, 1191 0, 1192 0, 1193 /* 83*/ MatLoad_MPIDense, 1194 0, 1195 0, 1196 0, 1197 0, 1198 0, 1199 #if defined(PETSC_HAVE_ELEMENTAL) 1200 /* 89*/ MatMatMult_MPIDense_MPIDense, 1201 MatMatMultSymbolic_MPIDense_MPIDense, 1202 #else 1203 /* 89*/ 0, 1204 0, 1205 #endif 1206 MatMatMultNumeric_MPIDense, 1207 0, 1208 0, 1209 /* 94*/ 0, 1210 0, 1211 0, 1212 0, 1213 0, 1214 /* 99*/ 0, 1215 0, 1216 0, 1217 MatConjugate_MPIDense, 1218 0, 1219 /*104*/ 0, 1220 MatRealPart_MPIDense, 1221 MatImaginaryPart_MPIDense, 1222 0, 1223 0, 1224 /*109*/ 0, 1225 0, 1226 0, 1227 0, 1228 MatMissingDiagonal_MPIDense, 1229 /*114*/ 0, 1230 0, 1231 0, 1232 0, 1233 0, 1234 /*119*/ 0, 1235 0, 1236 0, 1237 0, 1238 0, 1239 /*124*/ 0, 1240 MatGetColumnNorms_MPIDense, 1241 0, 1242 0, 1243 0, 1244 /*129*/ 0, 1245 MatTransposeMatMult_MPIDense_MPIDense, 1246 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1247 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1248 0, 1249 /*134*/ 0, 1250 0, 1251 0, 1252 0, 1253 0, 1254 /*139*/ 0, 1255 0, 1256 0 1257 }; 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1261 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1262 { 1263 Mat_MPIDense *a; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 mat->preallocated = PETSC_TRUE; 1268 /* Note: For now, when data is specified above, this assumes the user correctly 1269 allocates the local dense storage space. We should add error checking. */ 1270 1271 a = (Mat_MPIDense*)mat->data; 1272 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1273 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1274 a->nvec = mat->cmap->n; 1275 1276 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1277 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1278 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1279 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1280 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 #if defined(PETSC_HAVE_ELEMENTAL) 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatConvert_MPIDense_Elemental" 1287 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1288 { 1289 Mat mat_elemental; 1290 PetscErrorCode ierr; 1291 PetscScalar *v; 1292 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1293 1294 PetscFunctionBegin; 1295 if (reuse == MAT_REUSE_MATRIX) { 1296 mat_elemental = *newmat; 1297 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1298 } else { 1299 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1300 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1301 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1302 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1303 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1304 } 1305 1306 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1307 for (i=0; i<N; i++) cols[i] = i; 1308 for (i=0; i<m; i++) rows[i] = rstart + i; 1309 1310 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1311 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1312 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1313 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1314 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1315 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1316 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1317 1318 if (reuse == MAT_INPLACE_MATRIX) { 1319 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1320 } else { 1321 *newmat = mat_elemental; 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 #endif 1326 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatCreate_MPIDense" 1329 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1330 { 1331 Mat_MPIDense *a; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1336 mat->data = (void*)a; 1337 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1338 1339 mat->insertmode = NOT_SET_VALUES; 1340 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1341 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1342 1343 /* build cache for off array entries formed */ 1344 a->donotstash = PETSC_FALSE; 1345 1346 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1347 1348 /* stuff used for matrix vector multiply */ 1349 a->lvec = 0; 1350 a->Mvctx = 0; 1351 a->roworiented = PETSC_TRUE; 1352 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1355 #if defined(PETSC_HAVE_ELEMENTAL) 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1357 #endif 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1362 1363 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1364 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1365 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1366 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 /*MC 1371 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1372 1373 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1374 and MATMPIDENSE otherwise. 1375 1376 Options Database Keys: 1377 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1378 1379 Level: beginner 1380 1381 1382 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1383 M*/ 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1387 /*@C 1388 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1389 1390 Not collective 1391 1392 Input Parameters: 1393 . B - the matrix 1394 - data - optional location of matrix data. Set data=NULL for PETSc 1395 to control all matrix memory allocation. 1396 1397 Notes: 1398 The dense format is fully compatible with standard Fortran 77 1399 storage by columns. 1400 1401 The data input variable is intended primarily for Fortran programmers 1402 who wish to allocate their own matrix memory space. Most users should 1403 set data=NULL. 1404 1405 Level: intermediate 1406 1407 .keywords: matrix,dense, parallel 1408 1409 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1410 @*/ 1411 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1412 { 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatCreateDense" 1422 /*@C 1423 MatCreateDense - Creates a parallel matrix in dense format. 1424 1425 Collective on MPI_Comm 1426 1427 Input Parameters: 1428 + comm - MPI communicator 1429 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1430 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1431 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1432 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1433 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1434 to control all matrix memory allocation. 1435 1436 Output Parameter: 1437 . A - the matrix 1438 1439 Notes: 1440 The dense format is fully compatible with standard Fortran 77 1441 storage by columns. 1442 1443 The data input variable is intended primarily for Fortran programmers 1444 who wish to allocate their own matrix memory space. Most users should 1445 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1446 1447 The user MUST specify either the local or global matrix dimensions 1448 (possibly both). 1449 1450 Level: intermediate 1451 1452 .keywords: matrix,dense, parallel 1453 1454 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1455 @*/ 1456 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1457 { 1458 PetscErrorCode ierr; 1459 PetscMPIInt size; 1460 1461 PetscFunctionBegin; 1462 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1463 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1464 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1465 if (size > 1) { 1466 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1467 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1468 if (data) { /* user provided data array, so no need to assemble */ 1469 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1470 (*A)->assembled = PETSC_TRUE; 1471 } 1472 } else { 1473 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1474 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1475 } 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatDuplicate_MPIDense" 1481 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1482 { 1483 Mat mat; 1484 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1485 PetscErrorCode ierr; 1486 1487 PetscFunctionBegin; 1488 *newmat = 0; 1489 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1490 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1491 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1492 a = (Mat_MPIDense*)mat->data; 1493 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1494 1495 mat->factortype = A->factortype; 1496 mat->assembled = PETSC_TRUE; 1497 mat->preallocated = PETSC_TRUE; 1498 1499 a->size = oldmat->size; 1500 a->rank = oldmat->rank; 1501 mat->insertmode = NOT_SET_VALUES; 1502 a->nvec = oldmat->nvec; 1503 a->donotstash = oldmat->donotstash; 1504 1505 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1506 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1507 1508 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1509 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1510 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1511 1512 *newmat = mat; 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1518 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1519 { 1520 PetscErrorCode ierr; 1521 PetscMPIInt rank,size; 1522 const PetscInt *rowners; 1523 PetscInt i,m,n,nz,j,mMax; 1524 PetscScalar *array,*vals,*vals_ptr; 1525 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1526 1527 PetscFunctionBegin; 1528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1530 1531 /* determine ownership of rows and columns */ 1532 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1533 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1534 1535 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1536 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1537 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1538 } 1539 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1540 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1541 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1542 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1543 if (!rank) { 1544 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1545 1546 /* read in my part of the matrix numerical values */ 1547 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1548 1549 /* insert into matrix-by row (this is why cannot directly read into array */ 1550 vals_ptr = vals; 1551 for (i=0; i<m; i++) { 1552 for (j=0; j<N; j++) { 1553 array[i + j*m] = *vals_ptr++; 1554 } 1555 } 1556 1557 /* read in other processors and ship out */ 1558 for (i=1; i<size; i++) { 1559 nz = (rowners[i+1] - rowners[i])*N; 1560 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1561 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1562 } 1563 } else { 1564 /* receive numeric values */ 1565 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1566 1567 /* receive message of values*/ 1568 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1569 1570 /* insert into matrix-by row (this is why cannot directly read into array */ 1571 vals_ptr = vals; 1572 for (i=0; i<m; i++) { 1573 for (j=0; j<N; j++) { 1574 array[i + j*m] = *vals_ptr++; 1575 } 1576 } 1577 } 1578 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1579 ierr = PetscFree(vals);CHKERRQ(ierr); 1580 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1581 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatLoad_MPIDense" 1587 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1588 { 1589 Mat_MPIDense *a; 1590 PetscScalar *vals,*svals; 1591 MPI_Comm comm; 1592 MPI_Status status; 1593 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1594 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1595 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1596 PetscInt i,nz,j,rstart,rend; 1597 int fd; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 /* force binary viewer to load .info file if it has not yet done so */ 1602 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1603 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1604 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1605 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1606 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1607 if (!rank) { 1608 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1609 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1610 } 1611 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1612 M = header[1]; N = header[2]; nz = header[3]; 1613 1614 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1615 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1616 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1617 1618 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1619 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1620 1621 /* 1622 Handle case where matrix is stored on disk as a dense matrix 1623 */ 1624 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1625 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 /* determine ownership of all rows */ 1630 if (newmat->rmap->n < 0) { 1631 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1632 } else { 1633 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1634 } 1635 if (newmat->cmap->n < 0) { 1636 n = PETSC_DECIDE; 1637 } else { 1638 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1639 } 1640 1641 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1642 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1643 rowners[0] = 0; 1644 for (i=2; i<=size; i++) { 1645 rowners[i] += rowners[i-1]; 1646 } 1647 rstart = rowners[rank]; 1648 rend = rowners[rank+1]; 1649 1650 /* distribute row lengths to all processors */ 1651 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1652 if (!rank) { 1653 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1654 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1655 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1656 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1657 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1658 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1659 } else { 1660 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1661 } 1662 1663 if (!rank) { 1664 /* calculate the number of nonzeros on each processor */ 1665 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1666 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1667 for (i=0; i<size; i++) { 1668 for (j=rowners[i]; j< rowners[i+1]; j++) { 1669 procsnz[i] += rowlengths[j]; 1670 } 1671 } 1672 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1673 1674 /* determine max buffer needed and allocate it */ 1675 maxnz = 0; 1676 for (i=0; i<size; i++) { 1677 maxnz = PetscMax(maxnz,procsnz[i]); 1678 } 1679 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1680 1681 /* read in my part of the matrix column indices */ 1682 nz = procsnz[0]; 1683 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1684 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1685 1686 /* read in every one elses and ship off */ 1687 for (i=1; i<size; i++) { 1688 nz = procsnz[i]; 1689 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1690 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1691 } 1692 ierr = PetscFree(cols);CHKERRQ(ierr); 1693 } else { 1694 /* determine buffer space needed for message */ 1695 nz = 0; 1696 for (i=0; i<m; i++) { 1697 nz += ourlens[i]; 1698 } 1699 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1700 1701 /* receive message of column indices*/ 1702 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1703 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1704 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1705 } 1706 1707 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1708 a = (Mat_MPIDense*)newmat->data; 1709 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1710 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1711 } 1712 1713 if (!rank) { 1714 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1715 1716 /* read in my part of the matrix numerical values */ 1717 nz = procsnz[0]; 1718 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1719 1720 /* insert into matrix */ 1721 jj = rstart; 1722 smycols = mycols; 1723 svals = vals; 1724 for (i=0; i<m; i++) { 1725 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1726 smycols += ourlens[i]; 1727 svals += ourlens[i]; 1728 jj++; 1729 } 1730 1731 /* read in other processors and ship out */ 1732 for (i=1; i<size; i++) { 1733 nz = procsnz[i]; 1734 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1735 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1736 } 1737 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1738 } else { 1739 /* receive numeric values */ 1740 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1741 1742 /* receive message of values*/ 1743 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1744 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1745 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1746 1747 /* insert into matrix */ 1748 jj = rstart; 1749 smycols = mycols; 1750 svals = vals; 1751 for (i=0; i<m; i++) { 1752 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1753 smycols += ourlens[i]; 1754 svals += ourlens[i]; 1755 jj++; 1756 } 1757 } 1758 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1759 ierr = PetscFree(vals);CHKERRQ(ierr); 1760 ierr = PetscFree(mycols);CHKERRQ(ierr); 1761 ierr = PetscFree(rowners);CHKERRQ(ierr); 1762 1763 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1764 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 #undef __FUNCT__ 1769 #define __FUNCT__ "MatEqual_MPIDense" 1770 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1771 { 1772 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1773 Mat a,b; 1774 PetscBool flg; 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 a = matA->A; 1779 b = matB->A; 1780 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1781 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense" 1787 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1788 { 1789 PetscErrorCode ierr; 1790 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1791 Mat_TransMatMultDense *atb = a->atbdense; 1792 1793 PetscFunctionBegin; 1794 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1795 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1796 ierr = PetscFree(atb);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense" 1802 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1803 { 1804 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1805 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1806 Mat_TransMatMultDense *atb = c->atbdense; 1807 PetscErrorCode ierr; 1808 MPI_Comm comm; 1809 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1810 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1811 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1812 PetscScalar _DOne=1.0,_DZero=0.0; 1813 PetscBLASInt am,an,bn,aN; 1814 const PetscInt *ranges; 1815 1816 PetscFunctionBegin; 1817 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1818 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1819 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1820 1821 /* compute atbarray = aseq^T * bseq */ 1822 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1823 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1824 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1825 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1826 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1827 1828 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1829 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1830 1831 /* arrange atbarray into sendbuf */ 1832 k = 0; 1833 for (proc=0; proc<size; proc++) { 1834 for (j=0; j<cN; j++) { 1835 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1836 } 1837 } 1838 /* sum all atbarray to local values of C */ 1839 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1840 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1841 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1842 PetscFunctionReturn(0); 1843 } 1844 1845 #undef __FUNCT__ 1846 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense" 1847 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1848 { 1849 PetscErrorCode ierr; 1850 Mat Cdense; 1851 MPI_Comm comm; 1852 PetscMPIInt size; 1853 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1854 Mat_MPIDense *c; 1855 Mat_TransMatMultDense *atb; 1856 1857 PetscFunctionBegin; 1858 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1859 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1860 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1861 } 1862 1863 /* create matrix product Cdense */ 1864 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1865 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1866 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1867 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1868 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1869 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1870 *C = Cdense; 1871 1872 /* create data structure for reuse Cdense */ 1873 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1874 ierr = PetscNew(&atb);CHKERRQ(ierr); 1875 cM = Cdense->rmap->N; 1876 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1877 1878 c = (Mat_MPIDense*)Cdense->data; 1879 c->atbdense = atb; 1880 atb->destroy = Cdense->ops->destroy; 1881 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense" 1887 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1888 { 1889 PetscErrorCode ierr; 1890 1891 PetscFunctionBegin; 1892 if (scall == MAT_INITIAL_MATRIX) { 1893 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1894 } 1895 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense" 1901 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1902 { 1903 PetscErrorCode ierr; 1904 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1905 Mat_MatMultDense *ab = a->abdense; 1906 1907 PetscFunctionBegin; 1908 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1909 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1910 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1911 1912 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1913 ierr = PetscFree(ab);CHKERRQ(ierr); 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #if defined(PETSC_HAVE_ELEMENTAL) 1918 #undef __FUNCT__ 1919 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1920 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1921 { 1922 PetscErrorCode ierr; 1923 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1924 Mat_MatMultDense *ab=c->abdense; 1925 1926 PetscFunctionBegin; 1927 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1928 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1929 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1930 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1936 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1937 { 1938 PetscErrorCode ierr; 1939 Mat Ae,Be,Ce; 1940 Mat_MPIDense *c; 1941 Mat_MatMultDense *ab; 1942 1943 PetscFunctionBegin; 1944 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1945 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1946 } 1947 1948 /* convert A and B to Elemental matrices Ae and Be */ 1949 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1950 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1951 1952 /* Ce = Ae*Be */ 1953 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1954 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1955 1956 /* convert Ce to C */ 1957 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1958 1959 /* create data structure for reuse Cdense */ 1960 ierr = PetscNew(&ab);CHKERRQ(ierr); 1961 c = (Mat_MPIDense*)(*C)->data; 1962 c->abdense = ab; 1963 1964 ab->Ae = Ae; 1965 ab->Be = Be; 1966 ab->Ce = Ce; 1967 ab->destroy = (*C)->ops->destroy; 1968 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1969 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1975 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1976 { 1977 PetscErrorCode ierr; 1978 1979 PetscFunctionBegin; 1980 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1981 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1982 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1983 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1984 } else { 1985 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1986 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1987 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 #endif 1992