1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseGetLocalMatrix" 13 /*@ 14 15 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the Seq or MPI dense matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 @*/ 27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 28 { 29 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 30 PetscErrorCode ierr; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 35 if (flg) *B = mat->A; 36 else *B = A; 37 PetscFunctionReturn(0); 38 } 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatGetRow_MPIDense" 42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 43 { 44 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 45 PetscErrorCode ierr; 46 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 47 48 PetscFunctionBegin; 49 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 50 lrow = row - rstart; 51 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 55 #undef __FUNCT__ 56 #define __FUNCT__ "MatRestoreRow_MPIDense" 57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 58 { 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 63 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 Mat B; 77 78 PetscFunctionBegin; 79 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 80 81 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 82 if (!B) { 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 for (i=0; i<m; i++) { 110 if (idxm[i] < 0) continue; 111 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 112 if (idxm[i] >= rstart && idxm[i] < rend) { 113 row = idxm[i] - rstart; 114 if (roworiented) { 115 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 116 } else { 117 for (j=0; j<n; j++) { 118 if (idxn[j] < 0) continue; 119 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 120 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 121 } 122 } 123 } else if (!A->donotstash) { 124 mat->assembled = PETSC_FALSE; 125 if (roworiented) { 126 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 127 } else { 128 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetValues_MPIDense" 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatDenseGetArray_MPIDense" 161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 174 { 175 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 176 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 177 PetscErrorCode ierr; 178 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 179 const PetscInt *irow,*icol; 180 PetscScalar *av,*bv,*v = lmat->v; 181 Mat newmat; 182 IS iscol_local; 183 184 PetscFunctionBegin; 185 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 186 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 187 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 190 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 191 192 /* No parallel redistribution currently supported! Should really check each index set 193 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 194 original matrix! */ 195 196 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 197 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 198 199 /* Check submatrix call */ 200 if (scall == MAT_REUSE_MATRIX) { 201 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 202 /* Really need to test rows and column sizes! */ 203 newmat = *B; 204 } else { 205 /* Create and fill new matrix */ 206 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 207 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 208 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 209 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 210 } 211 212 /* Now extract the data pointers and do the copy, column at a time */ 213 newmatd = (Mat_MPIDense*)newmat->data; 214 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 215 216 for (i=0; i<Ncols; i++) { 217 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 218 for (j=0; j<nrows; j++) { 219 *bv++ = av[irow[j] - rstart]; 220 } 221 } 222 223 /* Assemble the matrices so that the correct flags are set */ 224 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 227 /* Free work space */ 228 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 230 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 231 *B = newmat; 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 280 PetscFunctionBegin; 281 /* wait on receives */ 282 while (1) { 283 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 284 if (!flg) break; 285 286 for (i=0; i<n;) { 287 /* Now identify the consecutive vals belonging to the same row */ 288 for (j=i,rstart=row[j]; j<n; j++) { 289 if (row[j] != rstart) break; 290 } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap->range; 334 PetscInt *sizes,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscBool found; 343 const PetscScalar *xx; 344 PetscScalar *bb; 345 346 PetscFunctionBegin; 347 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 348 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 352 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 353 for (i=0; i<N; i++) { 354 idx = rows[i]; 355 found = PETSC_FALSE; 356 for (j=0; j<size; j++) { 357 if (idx >= owners[j] && idx < owners[j+1]) { 358 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 359 } 360 } 361 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 362 } 363 nsends = 0; 364 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 365 366 /* inform other processors of number of messages and max length*/ 367 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 368 369 /* post receives: */ 370 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 371 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 372 for (i=0; i<nrecvs; i++) { 373 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 374 } 375 376 /* do sends: 377 1) starts[i] gives the starting index in svalues for stuff going to 378 the ith processor 379 */ 380 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 381 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 382 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 383 384 starts[0] = 0; 385 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 386 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 387 388 starts[0] = 0; 389 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 390 count = 0; 391 for (i=0; i<size; i++) { 392 if (sizes[2*i+1]) { 393 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 394 } 395 } 396 ierr = PetscFree(starts);CHKERRQ(ierr); 397 398 base = owners[rank]; 399 400 /* wait on receives */ 401 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 402 count = nrecvs; 403 slen = 0; 404 while (count) { 405 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 406 /* unpack receives into our local space */ 407 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 408 409 source[imdex] = recv_status.MPI_SOURCE; 410 lens[imdex] = n; 411 slen += n; 412 count--; 413 } 414 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 415 416 /* move the data into the send scatter */ 417 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 418 count = 0; 419 for (i=0; i<nrecvs; i++) { 420 values = rvalues + i*nmax; 421 for (j=0; j<lens[i]; j++) { 422 lrows[count++] = values[j] - base; 423 } 424 } 425 ierr = PetscFree(rvalues);CHKERRQ(ierr); 426 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 427 ierr = PetscFree(owner);CHKERRQ(ierr); 428 ierr = PetscFree(sizes);CHKERRQ(ierr); 429 430 /* fix right hand side if needed */ 431 if (x && b) { 432 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 433 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 434 for (i=0; i<slen; i++) { 435 bb[lrows[i]] = diag*xx[lrows[i]]; 436 } 437 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 439 } 440 441 /* actually zap the local rows */ 442 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 443 if (diag != 0.0) { 444 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 445 PetscInt m = ll->lda, i; 446 447 for (i=0; i<slen; i++) { 448 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 449 } 450 } 451 ierr = PetscFree(lrows);CHKERRQ(ierr); 452 453 /* wait on sends */ 454 if (nsends) { 455 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 456 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 457 ierr = PetscFree(send_status);CHKERRQ(ierr); 458 } 459 ierr = PetscFree(send_waits);CHKERRQ(ierr); 460 ierr = PetscFree(svalues);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMult_MPIDense" 466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatMultAdd_MPIDense" 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 PetscErrorCode ierr; 484 485 PetscFunctionBegin; 486 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatMultTranspose_MPIDense" 494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 495 { 496 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 497 PetscErrorCode ierr; 498 PetscScalar zero = 0.0; 499 500 PetscFunctionBegin; 501 ierr = VecSet(yy,zero);CHKERRQ(ierr); 502 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 503 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 504 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 517 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 518 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 519 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatGetDiagonal_MPIDense" 525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 526 { 527 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 528 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 529 PetscErrorCode ierr; 530 PetscInt len,i,n,m = A->rmap->n,radd; 531 PetscScalar *x,zero = 0.0; 532 533 PetscFunctionBegin; 534 ierr = VecSet(v,zero);CHKERRQ(ierr); 535 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 536 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 537 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 538 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 539 radd = A->rmap->rstart*m; 540 for (i=0; i<len; i++) { 541 x[i] = aloc->v[radd + i*m + i]; 542 } 543 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatDestroy_MPIDense" 549 PetscErrorCode MatDestroy_MPIDense(Mat mat) 550 { 551 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 #if defined(PETSC_USE_LOG) 556 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 557 #endif 558 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 559 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 560 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 561 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 562 563 ierr = PetscFree(mat->data);CHKERRQ(ierr); 564 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 565 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 568 #if defined(PETSC_HAVE_ELEMENTAL) 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 570 #endif 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 595 PetscFunctionBegin; 596 if (mdn->size == 1) { 597 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 598 } else { 599 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 600 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 601 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 602 603 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 604 if (format == PETSC_VIEWER_NATIVE) { 605 606 if (!rank) { 607 /* store the matrix as a dense matrix */ 608 header[0] = MAT_FILE_CLASSID; 609 header[1] = mat->rmap->N; 610 header[2] = N; 611 header[3] = MATRIX_BINARY_FORMAT_DENSE; 612 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 613 614 /* get largest work array needed for transposing array */ 615 mmax = mat->rmap->n; 616 for (i=1; i<size; i++) { 617 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 618 } 619 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 620 621 /* write out local array, by rows */ 622 m = mat->rmap->n; 623 v = a->v; 624 for (j=0; j<N; j++) { 625 for (i=0; i<m; i++) { 626 work[j + i*N] = *v++; 627 } 628 } 629 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 630 /* get largest work array to receive messages from other processes, excludes process zero */ 631 mmax = 0; 632 for (i=1; i<size; i++) { 633 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 634 } 635 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 636 for (k = 1; k < size; k++) { 637 v = vv; 638 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 639 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 641 for (j = 0; j < N; j++) { 642 for (i = 0; i < m; i++) { 643 work[j + i*N] = *v++; 644 } 645 } 646 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 647 } 648 ierr = PetscFree(work);CHKERRQ(ierr); 649 ierr = PetscFree(vv);CHKERRQ(ierr); 650 } else { 651 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 652 } 653 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 654 } 655 PetscFunctionReturn(0); 656 } 657 658 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 659 #include <petscdraw.h> 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 663 { 664 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 665 PetscErrorCode ierr; 666 PetscMPIInt rank = mdn->rank; 667 PetscViewerType vtype; 668 PetscBool iascii,isdraw; 669 PetscViewer sviewer; 670 PetscViewerFormat format; 671 672 PetscFunctionBegin; 673 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 674 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 675 if (iascii) { 676 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 677 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 678 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 679 MatInfo info; 680 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 683 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 684 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 685 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } else if (format == PETSC_VIEWER_ASCII_INFO) { 688 PetscFunctionReturn(0); 689 } 690 } else if (isdraw) { 691 PetscDraw draw; 692 PetscBool isnull; 693 694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 696 if (isnull) PetscFunctionReturn(0); 697 } 698 699 { 700 /* assemble the entire matrix onto first processor. */ 701 Mat A; 702 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 703 PetscInt *cols; 704 PetscScalar *vals; 705 706 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 707 if (!rank) { 708 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 711 } 712 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 713 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 714 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 715 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 716 717 /* Copy the matrix ... This isn't the most efficient means, 718 but it's quick for now */ 719 A->insertmode = INSERT_VALUES; 720 721 row = mat->rmap->rstart; 722 m = mdn->A->rmap->n; 723 for (i=0; i<m; i++) { 724 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 725 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 726 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 727 row++; 728 } 729 730 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 731 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 732 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 733 if (!rank) { 734 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 735 } 736 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 737 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 738 ierr = MatDestroy(&A);CHKERRQ(ierr); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIDense" 745 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 746 { 747 PetscErrorCode ierr; 748 PetscBool iascii,isbinary,isdraw,issocket; 749 750 PetscFunctionBegin; 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 753 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 754 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 755 756 if (iascii || issocket || isdraw) { 757 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 758 } else if (isbinary) { 759 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetInfo_MPIDense" 766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 767 { 768 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 769 Mat mdn = mat->A; 770 PetscErrorCode ierr; 771 PetscReal isend[5],irecv[5]; 772 773 PetscFunctionBegin; 774 info->block_size = 1.0; 775 776 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 777 778 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 779 isend[3] = info->memory; isend[4] = info->mallocs; 780 if (flag == MAT_LOCAL) { 781 info->nz_used = isend[0]; 782 info->nz_allocated = isend[1]; 783 info->nz_unneeded = isend[2]; 784 info->memory = isend[3]; 785 info->mallocs = isend[4]; 786 } else if (flag == MAT_GLOBAL_MAX) { 787 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 788 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } else if (flag == MAT_GLOBAL_SUM) { 795 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 796 797 info->nz_used = irecv[0]; 798 info->nz_allocated = irecv[1]; 799 info->nz_unneeded = irecv[2]; 800 info->memory = irecv[3]; 801 info->mallocs = irecv[4]; 802 } 803 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 804 info->fill_ratio_needed = 0; 805 info->factor_mallocs = 0; 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetOption_MPIDense" 811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 812 { 813 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 switch (op) { 818 case MAT_NEW_NONZERO_LOCATIONS: 819 case MAT_NEW_NONZERO_LOCATION_ERR: 820 case MAT_NEW_NONZERO_ALLOCATION_ERR: 821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 822 break; 823 case MAT_ROW_ORIENTED: 824 a->roworiented = flg; 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_NEW_DIAGONALS: 828 case MAT_KEEP_NONZERO_PATTERN: 829 case MAT_USE_HASH_TABLE: 830 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 831 break; 832 case MAT_IGNORE_OFF_PROC_ENTRIES: 833 a->donotstash = flg; 834 break; 835 case MAT_SYMMETRIC: 836 case MAT_STRUCTURALLY_SYMMETRIC: 837 case MAT_HERMITIAN: 838 case MAT_SYMMETRY_ETERNAL: 839 case MAT_IGNORE_LOWER_TRIANGULAR: 840 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 841 break; 842 default: 843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatDiagonalScale_MPIDense" 851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 852 { 853 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 854 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 855 PetscScalar *l,*r,x,*v; 856 PetscErrorCode ierr; 857 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 858 859 PetscFunctionBegin; 860 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 861 if (ll) { 862 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 863 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 864 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 865 for (i=0; i<m; i++) { 866 x = l[i]; 867 v = mat->v + i; 868 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 869 } 870 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 871 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 872 } 873 if (rr) { 874 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 875 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 876 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 879 for (i=0; i<n; i++) { 880 x = r[i]; 881 v = mat->v + i*m; 882 for (j=0; j<m; j++) (*v++) *= x; 883 } 884 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 885 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 886 } 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatNorm_MPIDense" 892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 893 { 894 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 895 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 896 PetscErrorCode ierr; 897 PetscInt i,j; 898 PetscReal sum = 0.0; 899 PetscScalar *v = mat->v; 900 901 PetscFunctionBegin; 902 if (mdn->size == 1) { 903 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 904 } else { 905 if (type == NORM_FROBENIUS) { 906 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 907 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 908 } 909 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 910 *nrm = PetscSqrtReal(*nrm); 911 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 912 } else if (type == NORM_1) { 913 PetscReal *tmp,*tmp2; 914 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 915 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 916 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 917 *nrm = 0.0; 918 v = mat->v; 919 for (j=0; j<mdn->A->cmap->n; j++) { 920 for (i=0; i<mdn->A->rmap->n; i++) { 921 tmp[j] += PetscAbsScalar(*v); v++; 922 } 923 } 924 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 925 for (j=0; j<A->cmap->N; j++) { 926 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 927 } 928 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 929 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 930 } else if (type == NORM_INFINITY) { /* max row norm */ 931 PetscReal ntemp; 932 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 933 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 934 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 935 } 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatTranspose_MPIDense" 941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 942 { 943 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 944 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 945 Mat B; 946 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 947 PetscErrorCode ierr; 948 PetscInt j,i; 949 PetscScalar *v; 950 951 PetscFunctionBegin; 952 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 953 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 954 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 955 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 956 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 957 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 958 } else { 959 B = *matout; 960 } 961 962 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 963 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 964 for (i=0; i<m; i++) rwork[i] = rstart + i; 965 for (j=0; j<n; j++) { 966 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 967 v += m; 968 } 969 ierr = PetscFree(rwork);CHKERRQ(ierr); 970 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 973 *matout = B; 974 } else { 975 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatSetUp_MPIDense" 986 PetscErrorCode MatSetUp_MPIDense(Mat A) 987 { 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatAXPY_MPIDense" 997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 998 { 999 PetscErrorCode ierr; 1000 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1001 1002 PetscFunctionBegin; 1003 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1004 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatConjugate_MPIDense" 1010 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1011 { 1012 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1013 PetscErrorCode ierr; 1014 1015 PetscFunctionBegin; 1016 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatRealPart_MPIDense" 1022 PetscErrorCode MatRealPart_MPIDense(Mat A) 1023 { 1024 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1025 PetscErrorCode ierr; 1026 1027 PetscFunctionBegin; 1028 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1037 PetscErrorCode ierr; 1038 1039 PetscFunctionBegin; 1040 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1047 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1048 { 1049 PetscErrorCode ierr; 1050 PetscInt i,n; 1051 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1052 PetscReal *work; 1053 1054 PetscFunctionBegin; 1055 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1056 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1057 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1058 if (type == NORM_2) { 1059 for (i=0; i<n; i++) work[i] *= work[i]; 1060 } 1061 if (type == NORM_INFINITY) { 1062 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1063 } else { 1064 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1065 } 1066 ierr = PetscFree(work);CHKERRQ(ierr); 1067 if (type == NORM_2) { 1068 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatSetRandom_MPIDense" 1075 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1076 { 1077 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1078 PetscErrorCode ierr; 1079 PetscScalar *a; 1080 PetscInt m,n,i; 1081 1082 PetscFunctionBegin; 1083 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1084 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1085 for (i=0; i<m*n; i++) { 1086 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1087 } 1088 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatMissingDiagonal_MPIDense" 1096 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1097 { 1098 PetscFunctionBegin; 1099 *missing = PETSC_FALSE; 1100 PetscFunctionReturn(0); 1101 } 1102 1103 /* -------------------------------------------------------------------*/ 1104 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1105 MatGetRow_MPIDense, 1106 MatRestoreRow_MPIDense, 1107 MatMult_MPIDense, 1108 /* 4*/ MatMultAdd_MPIDense, 1109 MatMultTranspose_MPIDense, 1110 MatMultTransposeAdd_MPIDense, 1111 0, 1112 0, 1113 0, 1114 /* 10*/ 0, 1115 0, 1116 0, 1117 0, 1118 MatTranspose_MPIDense, 1119 /* 15*/ MatGetInfo_MPIDense, 1120 MatEqual_MPIDense, 1121 MatGetDiagonal_MPIDense, 1122 MatDiagonalScale_MPIDense, 1123 MatNorm_MPIDense, 1124 /* 20*/ MatAssemblyBegin_MPIDense, 1125 MatAssemblyEnd_MPIDense, 1126 MatSetOption_MPIDense, 1127 MatZeroEntries_MPIDense, 1128 /* 24*/ MatZeroRows_MPIDense, 1129 0, 1130 0, 1131 0, 1132 0, 1133 /* 29*/ MatSetUp_MPIDense, 1134 0, 1135 0, 1136 0, 1137 0, 1138 /* 34*/ MatDuplicate_MPIDense, 1139 0, 1140 0, 1141 0, 1142 0, 1143 /* 39*/ MatAXPY_MPIDense, 1144 MatGetSubMatrices_MPIDense, 1145 0, 1146 MatGetValues_MPIDense, 1147 0, 1148 /* 44*/ 0, 1149 MatScale_MPIDense, 1150 MatShift_Basic, 1151 0, 1152 0, 1153 /* 49*/ MatSetRandom_MPIDense, 1154 0, 1155 0, 1156 0, 1157 0, 1158 /* 54*/ 0, 1159 0, 1160 0, 1161 0, 1162 0, 1163 /* 59*/ MatGetSubMatrix_MPIDense, 1164 MatDestroy_MPIDense, 1165 MatView_MPIDense, 1166 0, 1167 0, 1168 /* 64*/ 0, 1169 0, 1170 0, 1171 0, 1172 0, 1173 /* 69*/ 0, 1174 0, 1175 0, 1176 0, 1177 0, 1178 /* 74*/ 0, 1179 0, 1180 0, 1181 0, 1182 0, 1183 /* 79*/ 0, 1184 0, 1185 0, 1186 0, 1187 /* 83*/ MatLoad_MPIDense, 1188 0, 1189 0, 1190 0, 1191 0, 1192 0, 1193 #if defined(PETSC_HAVE_ELEMENTAL) 1194 /* 89*/ MatMatMult_MPIDense_MPIDense, 1195 MatMatMultSymbolic_MPIDense_MPIDense, 1196 #else 1197 /* 89*/ 0, 1198 0, 1199 #endif 1200 MatMatMultNumeric_MPIDense, 1201 0, 1202 0, 1203 /* 94*/ 0, 1204 0, 1205 0, 1206 0, 1207 0, 1208 /* 99*/ 0, 1209 0, 1210 0, 1211 MatConjugate_MPIDense, 1212 0, 1213 /*104*/ 0, 1214 MatRealPart_MPIDense, 1215 MatImaginaryPart_MPIDense, 1216 0, 1217 0, 1218 /*109*/ 0, 1219 0, 1220 0, 1221 0, 1222 MatMissingDiagonal_MPIDense, 1223 /*114*/ 0, 1224 0, 1225 0, 1226 0, 1227 0, 1228 /*119*/ 0, 1229 0, 1230 0, 1231 0, 1232 0, 1233 /*124*/ 0, 1234 MatGetColumnNorms_MPIDense, 1235 0, 1236 0, 1237 0, 1238 /*129*/ 0, 1239 MatTransposeMatMult_MPIDense_MPIDense, 1240 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1241 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1242 0, 1243 /*134*/ 0, 1244 0, 1245 0, 1246 0, 1247 0, 1248 /*139*/ 0, 1249 0, 1250 0 1251 }; 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1255 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1256 { 1257 Mat_MPIDense *a; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 mat->preallocated = PETSC_TRUE; 1262 /* Note: For now, when data is specified above, this assumes the user correctly 1263 allocates the local dense storage space. We should add error checking. */ 1264 1265 a = (Mat_MPIDense*)mat->data; 1266 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1267 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1268 a->nvec = mat->cmap->n; 1269 1270 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1271 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1272 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1273 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1274 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #if defined(PETSC_HAVE_ELEMENTAL) 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatConvert_MPIDense_Elemental" 1281 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1282 { 1283 Mat mat_elemental; 1284 PetscErrorCode ierr; 1285 PetscScalar *v; 1286 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1287 1288 PetscFunctionBegin; 1289 if (reuse == MAT_REUSE_MATRIX) { 1290 mat_elemental = *newmat; 1291 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1292 } else { 1293 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1294 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1295 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1296 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1297 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1298 } 1299 1300 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1301 for (i=0; i<N; i++) cols[i] = i; 1302 for (i=0; i<m; i++) rows[i] = rstart + i; 1303 1304 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1305 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1306 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1307 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1308 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1309 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1310 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1311 1312 if (reuse == MAT_INPLACE_MATRIX) { 1313 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1314 } else { 1315 *newmat = mat_elemental; 1316 } 1317 PetscFunctionReturn(0); 1318 } 1319 #endif 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatCreate_MPIDense" 1323 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1324 { 1325 Mat_MPIDense *a; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1330 mat->data = (void*)a; 1331 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1332 1333 mat->insertmode = NOT_SET_VALUES; 1334 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1335 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1336 1337 /* build cache for off array entries formed */ 1338 a->donotstash = PETSC_FALSE; 1339 1340 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1341 1342 /* stuff used for matrix vector multiply */ 1343 a->lvec = 0; 1344 a->Mvctx = 0; 1345 a->roworiented = PETSC_TRUE; 1346 1347 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1348 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1349 #if defined(PETSC_HAVE_ELEMENTAL) 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1351 #endif 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1357 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 /*MC 1366 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1367 1368 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1369 and MATMPIDENSE otherwise. 1370 1371 Options Database Keys: 1372 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1373 1374 Level: beginner 1375 1376 1377 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1378 M*/ 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1382 /*@C 1383 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1384 1385 Not collective 1386 1387 Input Parameters: 1388 . B - the matrix 1389 - data - optional location of matrix data. Set data=NULL for PETSc 1390 to control all matrix memory allocation. 1391 1392 Notes: 1393 The dense format is fully compatible with standard Fortran 77 1394 storage by columns. 1395 1396 The data input variable is intended primarily for Fortran programmers 1397 who wish to allocate their own matrix memory space. Most users should 1398 set data=NULL. 1399 1400 Level: intermediate 1401 1402 .keywords: matrix,dense, parallel 1403 1404 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1405 @*/ 1406 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1407 { 1408 PetscErrorCode ierr; 1409 1410 PetscFunctionBegin; 1411 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatCreateDense" 1417 /*@C 1418 MatCreateDense - Creates a parallel matrix in dense format. 1419 1420 Collective on MPI_Comm 1421 1422 Input Parameters: 1423 + comm - MPI communicator 1424 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1425 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1426 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1427 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1428 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1429 to control all matrix memory allocation. 1430 1431 Output Parameter: 1432 . A - the matrix 1433 1434 Notes: 1435 The dense format is fully compatible with standard Fortran 77 1436 storage by columns. 1437 1438 The data input variable is intended primarily for Fortran programmers 1439 who wish to allocate their own matrix memory space. Most users should 1440 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1441 1442 The user MUST specify either the local or global matrix dimensions 1443 (possibly both). 1444 1445 Level: intermediate 1446 1447 .keywords: matrix,dense, parallel 1448 1449 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1450 @*/ 1451 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1452 { 1453 PetscErrorCode ierr; 1454 PetscMPIInt size; 1455 1456 PetscFunctionBegin; 1457 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1458 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1459 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1460 if (size > 1) { 1461 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1462 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1463 if (data) { /* user provided data array, so no need to assemble */ 1464 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1465 (*A)->assembled = PETSC_TRUE; 1466 } 1467 } else { 1468 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1469 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatDuplicate_MPIDense" 1476 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1477 { 1478 Mat mat; 1479 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1480 PetscErrorCode ierr; 1481 1482 PetscFunctionBegin; 1483 *newmat = 0; 1484 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1485 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1486 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1487 a = (Mat_MPIDense*)mat->data; 1488 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1489 1490 mat->factortype = A->factortype; 1491 mat->assembled = PETSC_TRUE; 1492 mat->preallocated = PETSC_TRUE; 1493 1494 a->size = oldmat->size; 1495 a->rank = oldmat->rank; 1496 mat->insertmode = NOT_SET_VALUES; 1497 a->nvec = oldmat->nvec; 1498 a->donotstash = oldmat->donotstash; 1499 1500 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1501 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1502 1503 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1504 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1505 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1506 1507 *newmat = mat; 1508 PetscFunctionReturn(0); 1509 } 1510 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1513 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1514 { 1515 PetscErrorCode ierr; 1516 PetscMPIInt rank,size; 1517 const PetscInt *rowners; 1518 PetscInt i,m,n,nz,j,mMax; 1519 PetscScalar *array,*vals,*vals_ptr; 1520 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1521 1522 PetscFunctionBegin; 1523 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1525 1526 /* determine ownership of rows and columns */ 1527 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1528 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1529 1530 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1531 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1532 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1533 } 1534 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1535 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1536 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1537 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1538 if (!rank) { 1539 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1540 1541 /* read in my part of the matrix numerical values */ 1542 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1543 1544 /* insert into matrix-by row (this is why cannot directly read into array */ 1545 vals_ptr = vals; 1546 for (i=0; i<m; i++) { 1547 for (j=0; j<N; j++) { 1548 array[i + j*m] = *vals_ptr++; 1549 } 1550 } 1551 1552 /* read in other processors and ship out */ 1553 for (i=1; i<size; i++) { 1554 nz = (rowners[i+1] - rowners[i])*N; 1555 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1556 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1557 } 1558 } else { 1559 /* receive numeric values */ 1560 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1561 1562 /* receive message of values*/ 1563 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1564 1565 /* insert into matrix-by row (this is why cannot directly read into array */ 1566 vals_ptr = vals; 1567 for (i=0; i<m; i++) { 1568 for (j=0; j<N; j++) { 1569 array[i + j*m] = *vals_ptr++; 1570 } 1571 } 1572 } 1573 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1574 ierr = PetscFree(vals);CHKERRQ(ierr); 1575 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1576 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatLoad_MPIDense" 1582 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1583 { 1584 Mat_MPIDense *a; 1585 PetscScalar *vals,*svals; 1586 MPI_Comm comm; 1587 MPI_Status status; 1588 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1589 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1590 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1591 PetscInt i,nz,j,rstart,rend; 1592 int fd; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 /* force binary viewer to load .info file if it has not yet done so */ 1597 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1598 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1599 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1600 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1601 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1602 if (!rank) { 1603 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1604 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1605 } 1606 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1607 M = header[1]; N = header[2]; nz = header[3]; 1608 1609 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1610 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1611 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1612 1613 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1614 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1615 1616 /* 1617 Handle case where matrix is stored on disk as a dense matrix 1618 */ 1619 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1620 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 /* determine ownership of all rows */ 1625 if (newmat->rmap->n < 0) { 1626 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1627 } else { 1628 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1629 } 1630 if (newmat->cmap->n < 0) { 1631 n = PETSC_DECIDE; 1632 } else { 1633 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1634 } 1635 1636 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1637 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1638 rowners[0] = 0; 1639 for (i=2; i<=size; i++) { 1640 rowners[i] += rowners[i-1]; 1641 } 1642 rstart = rowners[rank]; 1643 rend = rowners[rank+1]; 1644 1645 /* distribute row lengths to all processors */ 1646 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1647 if (!rank) { 1648 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1649 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1650 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1651 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1652 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1653 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1654 } else { 1655 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1656 } 1657 1658 if (!rank) { 1659 /* calculate the number of nonzeros on each processor */ 1660 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1661 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1662 for (i=0; i<size; i++) { 1663 for (j=rowners[i]; j< rowners[i+1]; j++) { 1664 procsnz[i] += rowlengths[j]; 1665 } 1666 } 1667 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1668 1669 /* determine max buffer needed and allocate it */ 1670 maxnz = 0; 1671 for (i=0; i<size; i++) { 1672 maxnz = PetscMax(maxnz,procsnz[i]); 1673 } 1674 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1675 1676 /* read in my part of the matrix column indices */ 1677 nz = procsnz[0]; 1678 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1679 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1680 1681 /* read in every one elses and ship off */ 1682 for (i=1; i<size; i++) { 1683 nz = procsnz[i]; 1684 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1685 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1686 } 1687 ierr = PetscFree(cols);CHKERRQ(ierr); 1688 } else { 1689 /* determine buffer space needed for message */ 1690 nz = 0; 1691 for (i=0; i<m; i++) { 1692 nz += ourlens[i]; 1693 } 1694 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1695 1696 /* receive message of column indices*/ 1697 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1698 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1699 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1700 } 1701 1702 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1703 a = (Mat_MPIDense*)newmat->data; 1704 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1705 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1706 } 1707 1708 if (!rank) { 1709 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1710 1711 /* read in my part of the matrix numerical values */ 1712 nz = procsnz[0]; 1713 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1714 1715 /* insert into matrix */ 1716 jj = rstart; 1717 smycols = mycols; 1718 svals = vals; 1719 for (i=0; i<m; i++) { 1720 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1721 smycols += ourlens[i]; 1722 svals += ourlens[i]; 1723 jj++; 1724 } 1725 1726 /* read in other processors and ship out */ 1727 for (i=1; i<size; i++) { 1728 nz = procsnz[i]; 1729 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1730 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1731 } 1732 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1733 } else { 1734 /* receive numeric values */ 1735 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1736 1737 /* receive message of values*/ 1738 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1739 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1740 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1741 1742 /* insert into matrix */ 1743 jj = rstart; 1744 smycols = mycols; 1745 svals = vals; 1746 for (i=0; i<m; i++) { 1747 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1748 smycols += ourlens[i]; 1749 svals += ourlens[i]; 1750 jj++; 1751 } 1752 } 1753 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1754 ierr = PetscFree(vals);CHKERRQ(ierr); 1755 ierr = PetscFree(mycols);CHKERRQ(ierr); 1756 ierr = PetscFree(rowners);CHKERRQ(ierr); 1757 1758 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1759 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatEqual_MPIDense" 1765 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1766 { 1767 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1768 Mat a,b; 1769 PetscBool flg; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 a = matA->A; 1774 b = matB->A; 1775 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1776 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense" 1782 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1783 { 1784 PetscErrorCode ierr; 1785 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1786 Mat_TransMatMultDense *atb = a->atbdense; 1787 1788 PetscFunctionBegin; 1789 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1790 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1791 ierr = PetscFree(atb);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense" 1797 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1798 { 1799 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1800 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1801 Mat_TransMatMultDense *atb = c->atbdense; 1802 PetscErrorCode ierr; 1803 MPI_Comm comm; 1804 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1805 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1806 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1807 PetscScalar _DOne=1.0,_DZero=0.0; 1808 PetscBLASInt am,an,bn,aN; 1809 const PetscInt *ranges; 1810 1811 PetscFunctionBegin; 1812 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1813 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1814 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1815 1816 /* compute atbarray = aseq^T * bseq */ 1817 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1818 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1819 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1820 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1821 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1822 1823 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1824 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1825 1826 /* arrange atbarray into sendbuf */ 1827 k = 0; 1828 for (proc=0; proc<size; proc++) { 1829 for (j=0; j<cN; j++) { 1830 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1831 } 1832 } 1833 /* sum all atbarray to local values of C */ 1834 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1835 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1836 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1837 PetscFunctionReturn(0); 1838 } 1839 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense" 1842 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1843 { 1844 PetscErrorCode ierr; 1845 Mat Cdense; 1846 MPI_Comm comm; 1847 PetscMPIInt size; 1848 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1849 Mat_MPIDense *c; 1850 Mat_TransMatMultDense *atb; 1851 1852 PetscFunctionBegin; 1853 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1854 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1855 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1856 } 1857 1858 /* create matrix product Cdense */ 1859 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1860 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1861 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1862 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1863 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1864 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1865 *C = Cdense; 1866 1867 /* create data structure for reuse Cdense */ 1868 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1869 ierr = PetscNew(&atb);CHKERRQ(ierr); 1870 cM = Cdense->rmap->N; 1871 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1872 1873 c = (Mat_MPIDense*)Cdense->data; 1874 c->atbdense = atb; 1875 atb->destroy = Cdense->ops->destroy; 1876 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #undef __FUNCT__ 1881 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense" 1882 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1883 { 1884 PetscErrorCode ierr; 1885 1886 PetscFunctionBegin; 1887 if (scall == MAT_INITIAL_MATRIX) { 1888 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1889 } 1890 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense" 1896 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1897 { 1898 PetscErrorCode ierr; 1899 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1900 Mat_MatMultDense *ab = a->abdense; 1901 1902 PetscFunctionBegin; 1903 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1904 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1905 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1906 1907 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1908 ierr = PetscFree(ab);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #if defined(PETSC_HAVE_ELEMENTAL) 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1915 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1916 { 1917 PetscErrorCode ierr; 1918 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1919 Mat_MatMultDense *ab=c->abdense; 1920 1921 PetscFunctionBegin; 1922 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1923 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1924 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1925 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNCT__ 1930 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1931 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1932 { 1933 PetscErrorCode ierr; 1934 Mat Ae,Be,Ce; 1935 Mat_MPIDense *c; 1936 Mat_MatMultDense *ab; 1937 1938 PetscFunctionBegin; 1939 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1940 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1941 } 1942 1943 /* convert A and B to Elemental matrices Ae and Be */ 1944 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1945 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1946 1947 /* Ce = Ae*Be */ 1948 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1949 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1950 1951 /* convert Ce to C */ 1952 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1953 1954 /* create data structure for reuse Cdense */ 1955 ierr = PetscNew(&ab);CHKERRQ(ierr); 1956 c = (Mat_MPIDense*)(*C)->data; 1957 c->abdense = ab; 1958 1959 ab->Ae = Ae; 1960 ab->Be = Be; 1961 ab->Ce = Ce; 1962 ab->destroy = (*C)->ops->destroy; 1963 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1964 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1970 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1971 { 1972 PetscErrorCode ierr; 1973 1974 PetscFunctionBegin; 1975 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1976 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1977 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1978 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1979 } else { 1980 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1981 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1982 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1983 } 1984 PetscFunctionReturn(0); 1985 } 1986 #endif 1987