1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseGetLocalMatrix" 13 /*@ 14 15 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the Seq or MPI dense matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 @*/ 27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 28 { 29 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 30 PetscErrorCode ierr; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 35 if (flg) *B = mat->A; 36 else *B = A; 37 PetscFunctionReturn(0); 38 } 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatGetRow_MPIDense" 42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 43 { 44 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 45 PetscErrorCode ierr; 46 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 47 48 PetscFunctionBegin; 49 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 50 lrow = row - rstart; 51 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 55 #undef __FUNCT__ 56 #define __FUNCT__ "MatRestoreRow_MPIDense" 57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 58 { 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 63 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 Mat B; 77 78 PetscFunctionBegin; 79 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 80 81 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 82 if (!B) { 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 for (i=0; i<m; i++) { 110 if (idxm[i] < 0) continue; 111 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 112 if (idxm[i] >= rstart && idxm[i] < rend) { 113 row = idxm[i] - rstart; 114 if (roworiented) { 115 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 116 } else { 117 for (j=0; j<n; j++) { 118 if (idxn[j] < 0) continue; 119 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 120 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 121 } 122 } 123 } else if (!A->donotstash) { 124 mat->assembled = PETSC_FALSE; 125 if (roworiented) { 126 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 127 } else { 128 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetValues_MPIDense" 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatDenseGetArray_MPIDense" 161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 174 { 175 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 176 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 177 PetscErrorCode ierr; 178 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 179 const PetscInt *irow,*icol; 180 PetscScalar *av,*bv,*v = lmat->v; 181 Mat newmat; 182 IS iscol_local; 183 184 PetscFunctionBegin; 185 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 186 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 187 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 190 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 191 192 /* No parallel redistribution currently supported! Should really check each index set 193 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 194 original matrix! */ 195 196 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 197 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 198 199 /* Check submatrix call */ 200 if (scall == MAT_REUSE_MATRIX) { 201 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 202 /* Really need to test rows and column sizes! */ 203 newmat = *B; 204 } else { 205 /* Create and fill new matrix */ 206 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 207 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 208 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 209 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 210 } 211 212 /* Now extract the data pointers and do the copy, column at a time */ 213 newmatd = (Mat_MPIDense*)newmat->data; 214 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 215 216 for (i=0; i<Ncols; i++) { 217 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 218 for (j=0; j<nrows; j++) { 219 *bv++ = av[irow[j] - rstart]; 220 } 221 } 222 223 /* Assemble the matrices so that the correct flags are set */ 224 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 227 /* Free work space */ 228 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 230 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 231 *B = newmat; 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 280 PetscFunctionBegin; 281 /* wait on receives */ 282 while (1) { 283 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 284 if (!flg) break; 285 286 for (i=0; i<n;) { 287 /* Now identify the consecutive vals belonging to the same row */ 288 for (j=i,rstart=row[j]; j<n; j++) { 289 if (row[j] != rstart) break; 290 } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap->range; 334 PetscInt *sizes,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscBool found; 343 const PetscScalar *xx; 344 PetscScalar *bb; 345 346 PetscFunctionBegin; 347 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 348 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 352 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 353 for (i=0; i<N; i++) { 354 idx = rows[i]; 355 found = PETSC_FALSE; 356 for (j=0; j<size; j++) { 357 if (idx >= owners[j] && idx < owners[j+1]) { 358 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 359 } 360 } 361 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 362 } 363 nsends = 0; 364 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 365 366 /* inform other processors of number of messages and max length*/ 367 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 368 369 /* post receives: */ 370 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 371 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 372 for (i=0; i<nrecvs; i++) { 373 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 374 } 375 376 /* do sends: 377 1) starts[i] gives the starting index in svalues for stuff going to 378 the ith processor 379 */ 380 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 381 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 382 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 383 384 starts[0] = 0; 385 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 386 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 387 388 starts[0] = 0; 389 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 390 count = 0; 391 for (i=0; i<size; i++) { 392 if (sizes[2*i+1]) { 393 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 394 } 395 } 396 ierr = PetscFree(starts);CHKERRQ(ierr); 397 398 base = owners[rank]; 399 400 /* wait on receives */ 401 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 402 count = nrecvs; 403 slen = 0; 404 while (count) { 405 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 406 /* unpack receives into our local space */ 407 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 408 409 source[imdex] = recv_status.MPI_SOURCE; 410 lens[imdex] = n; 411 slen += n; 412 count--; 413 } 414 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 415 416 /* move the data into the send scatter */ 417 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 418 count = 0; 419 for (i=0; i<nrecvs; i++) { 420 values = rvalues + i*nmax; 421 for (j=0; j<lens[i]; j++) { 422 lrows[count++] = values[j] - base; 423 } 424 } 425 ierr = PetscFree(rvalues);CHKERRQ(ierr); 426 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 427 ierr = PetscFree(owner);CHKERRQ(ierr); 428 ierr = PetscFree(sizes);CHKERRQ(ierr); 429 430 /* fix right hand side if needed */ 431 if (x && b) { 432 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 433 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 434 for (i=0; i<slen; i++) { 435 bb[lrows[i]] = diag*xx[lrows[i]]; 436 } 437 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 439 } 440 441 /* actually zap the local rows */ 442 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 443 if (diag != 0.0) { 444 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 445 PetscInt m = ll->lda, i; 446 447 for (i=0; i<slen; i++) { 448 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 449 } 450 } 451 ierr = PetscFree(lrows);CHKERRQ(ierr); 452 453 /* wait on sends */ 454 if (nsends) { 455 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 456 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 457 ierr = PetscFree(send_status);CHKERRQ(ierr); 458 } 459 ierr = PetscFree(send_waits);CHKERRQ(ierr); 460 ierr = PetscFree(svalues);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatMult_MPIDense" 466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 467 { 468 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 469 PetscErrorCode ierr; 470 471 PetscFunctionBegin; 472 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatMultAdd_MPIDense" 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 PetscErrorCode ierr; 484 485 PetscFunctionBegin; 486 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 489 PetscFunctionReturn(0); 490 } 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatMultTranspose_MPIDense" 494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 495 { 496 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 497 PetscErrorCode ierr; 498 PetscScalar zero = 0.0; 499 500 PetscFunctionBegin; 501 ierr = VecSet(yy,zero);CHKERRQ(ierr); 502 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 503 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 504 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 511 { 512 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 517 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 518 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 519 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatGetDiagonal_MPIDense" 525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 526 { 527 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 528 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 529 PetscErrorCode ierr; 530 PetscInt len,i,n,m = A->rmap->n,radd; 531 PetscScalar *x,zero = 0.0; 532 533 PetscFunctionBegin; 534 ierr = VecSet(v,zero);CHKERRQ(ierr); 535 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 536 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 537 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 538 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 539 radd = A->rmap->rstart*m; 540 for (i=0; i<len; i++) { 541 x[i] = aloc->v[radd + i*m + i]; 542 } 543 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 #undef __FUNCT__ 548 #define __FUNCT__ "MatDestroy_MPIDense" 549 PetscErrorCode MatDestroy_MPIDense(Mat mat) 550 { 551 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 552 PetscErrorCode ierr; 553 554 PetscFunctionBegin; 555 #if defined(PETSC_USE_LOG) 556 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 557 #endif 558 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 559 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 560 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 561 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 562 563 ierr = PetscFree(mat->data);CHKERRQ(ierr); 564 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 565 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 568 #if defined(PETSC_HAVE_ELEMENTAL) 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 570 #endif 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 595 PetscFunctionBegin; 596 if (mdn->size == 1) { 597 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 598 } else { 599 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 600 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 601 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 602 603 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 604 if (format == PETSC_VIEWER_NATIVE) { 605 606 if (!rank) { 607 /* store the matrix as a dense matrix */ 608 header[0] = MAT_FILE_CLASSID; 609 header[1] = mat->rmap->N; 610 header[2] = N; 611 header[3] = MATRIX_BINARY_FORMAT_DENSE; 612 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 613 614 /* get largest work array needed for transposing array */ 615 mmax = mat->rmap->n; 616 for (i=1; i<size; i++) { 617 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 618 } 619 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 620 621 /* write out local array, by rows */ 622 m = mat->rmap->n; 623 v = a->v; 624 for (j=0; j<N; j++) { 625 for (i=0; i<m; i++) { 626 work[j + i*N] = *v++; 627 } 628 } 629 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 630 /* get largest work array to receive messages from other processes, excludes process zero */ 631 mmax = 0; 632 for (i=1; i<size; i++) { 633 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 634 } 635 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 636 for (k = 1; k < size; k++) { 637 v = vv; 638 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 639 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 641 for (j = 0; j < N; j++) { 642 for (i = 0; i < m; i++) { 643 work[j + i*N] = *v++; 644 } 645 } 646 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 647 } 648 ierr = PetscFree(work);CHKERRQ(ierr); 649 ierr = PetscFree(vv);CHKERRQ(ierr); 650 } else { 651 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 652 } 653 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 654 } 655 PetscFunctionReturn(0); 656 } 657 658 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 659 #include <petscdraw.h> 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 663 { 664 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 665 PetscErrorCode ierr; 666 PetscMPIInt rank = mdn->rank; 667 PetscViewerType vtype; 668 PetscBool iascii,isdraw; 669 PetscViewer sviewer; 670 PetscViewerFormat format; 671 672 PetscFunctionBegin; 673 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 674 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 675 if (iascii) { 676 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 677 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 678 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 679 MatInfo info; 680 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 681 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 683 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 684 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 685 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } else if (format == PETSC_VIEWER_ASCII_INFO) { 688 PetscFunctionReturn(0); 689 } 690 } else if (isdraw) { 691 PetscDraw draw; 692 PetscBool isnull; 693 694 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 695 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 696 if (isnull) PetscFunctionReturn(0); 697 } 698 699 { 700 /* assemble the entire matrix onto first processor. */ 701 Mat A; 702 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 703 PetscInt *cols; 704 PetscScalar *vals; 705 706 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 707 if (!rank) { 708 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 709 } else { 710 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 711 } 712 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 713 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 714 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 715 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 716 717 /* Copy the matrix ... This isn't the most efficient means, 718 but it's quick for now */ 719 A->insertmode = INSERT_VALUES; 720 721 row = mat->rmap->rstart; 722 m = mdn->A->rmap->n; 723 for (i=0; i<m; i++) { 724 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 725 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 726 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 727 row++; 728 } 729 730 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 731 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 732 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 733 if (!rank) { 734 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 735 } 736 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 737 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 738 ierr = MatDestroy(&A);CHKERRQ(ierr); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIDense" 745 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 746 { 747 PetscErrorCode ierr; 748 PetscBool iascii,isbinary,isdraw,issocket; 749 750 PetscFunctionBegin; 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 753 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 754 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 755 756 if (iascii || issocket || isdraw) { 757 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 758 } else if (isbinary) { 759 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatGetInfo_MPIDense" 766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 767 { 768 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 769 Mat mdn = mat->A; 770 PetscErrorCode ierr; 771 PetscReal isend[5],irecv[5]; 772 773 PetscFunctionBegin; 774 info->block_size = 1.0; 775 776 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 777 778 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 779 isend[3] = info->memory; isend[4] = info->mallocs; 780 if (flag == MAT_LOCAL) { 781 info->nz_used = isend[0]; 782 info->nz_allocated = isend[1]; 783 info->nz_unneeded = isend[2]; 784 info->memory = isend[3]; 785 info->mallocs = isend[4]; 786 } else if (flag == MAT_GLOBAL_MAX) { 787 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 788 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } else if (flag == MAT_GLOBAL_SUM) { 795 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 796 797 info->nz_used = irecv[0]; 798 info->nz_allocated = irecv[1]; 799 info->nz_unneeded = irecv[2]; 800 info->memory = irecv[3]; 801 info->mallocs = irecv[4]; 802 } 803 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 804 info->fill_ratio_needed = 0; 805 info->factor_mallocs = 0; 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetOption_MPIDense" 811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 812 { 813 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 814 PetscErrorCode ierr; 815 816 PetscFunctionBegin; 817 switch (op) { 818 case MAT_NEW_NONZERO_LOCATIONS: 819 case MAT_NEW_NONZERO_LOCATION_ERR: 820 case MAT_NEW_NONZERO_ALLOCATION_ERR: 821 MatCheckPreallocated(A,1); 822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 823 break; 824 case MAT_ROW_ORIENTED: 825 MatCheckPreallocated(A,1); 826 a->roworiented = flg; 827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 828 break; 829 case MAT_NEW_DIAGONALS: 830 case MAT_KEEP_NONZERO_PATTERN: 831 case MAT_USE_HASH_TABLE: 832 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 833 break; 834 case MAT_IGNORE_OFF_PROC_ENTRIES: 835 a->donotstash = flg; 836 break; 837 case MAT_SYMMETRIC: 838 case MAT_STRUCTURALLY_SYMMETRIC: 839 case MAT_HERMITIAN: 840 case MAT_SYMMETRY_ETERNAL: 841 case MAT_IGNORE_LOWER_TRIANGULAR: 842 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 843 break; 844 default: 845 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 846 } 847 PetscFunctionReturn(0); 848 } 849 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatDiagonalScale_MPIDense" 853 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 854 { 855 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 856 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 857 PetscScalar *l,*r,x,*v; 858 PetscErrorCode ierr; 859 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 860 861 PetscFunctionBegin; 862 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 863 if (ll) { 864 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 865 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 866 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 867 for (i=0; i<m; i++) { 868 x = l[i]; 869 v = mat->v + i; 870 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 871 } 872 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 873 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 874 } 875 if (rr) { 876 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 877 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 878 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 881 for (i=0; i<n; i++) { 882 x = r[i]; 883 v = mat->v + i*m; 884 for (j=0; j<m; j++) (*v++) *= x; 885 } 886 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 887 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 888 } 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "MatNorm_MPIDense" 894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 895 { 896 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 897 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 898 PetscErrorCode ierr; 899 PetscInt i,j; 900 PetscReal sum = 0.0; 901 PetscScalar *v = mat->v; 902 903 PetscFunctionBegin; 904 if (mdn->size == 1) { 905 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 906 } else { 907 if (type == NORM_FROBENIUS) { 908 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 909 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 910 } 911 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 912 *nrm = PetscSqrtReal(*nrm); 913 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 914 } else if (type == NORM_1) { 915 PetscReal *tmp,*tmp2; 916 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 917 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 918 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 919 *nrm = 0.0; 920 v = mat->v; 921 for (j=0; j<mdn->A->cmap->n; j++) { 922 for (i=0; i<mdn->A->rmap->n; i++) { 923 tmp[j] += PetscAbsScalar(*v); v++; 924 } 925 } 926 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 927 for (j=0; j<A->cmap->N; j++) { 928 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 929 } 930 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 931 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 932 } else if (type == NORM_INFINITY) { /* max row norm */ 933 PetscReal ntemp; 934 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 935 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 936 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 937 } 938 PetscFunctionReturn(0); 939 } 940 941 #undef __FUNCT__ 942 #define __FUNCT__ "MatTranspose_MPIDense" 943 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 944 { 945 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 946 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 947 Mat B; 948 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 949 PetscErrorCode ierr; 950 PetscInt j,i; 951 PetscScalar *v; 952 953 PetscFunctionBegin; 954 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 955 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 956 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 957 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 958 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 959 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 960 } else { 961 B = *matout; 962 } 963 964 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 965 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 966 for (i=0; i<m; i++) rwork[i] = rstart + i; 967 for (j=0; j<n; j++) { 968 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 969 v += m; 970 } 971 ierr = PetscFree(rwork);CHKERRQ(ierr); 972 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 973 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 974 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 975 *matout = B; 976 } else { 977 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 983 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 984 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatSetUp_MPIDense" 988 PetscErrorCode MatSetUp_MPIDense(Mat A) 989 { 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatAXPY_MPIDense" 999 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1000 { 1001 PetscErrorCode ierr; 1002 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1003 1004 PetscFunctionBegin; 1005 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1006 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatConjugate_MPIDense" 1012 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1013 { 1014 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1015 PetscErrorCode ierr; 1016 1017 PetscFunctionBegin; 1018 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1019 PetscFunctionReturn(0); 1020 } 1021 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatRealPart_MPIDense" 1024 PetscErrorCode MatRealPart_MPIDense(Mat A) 1025 { 1026 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1027 PetscErrorCode ierr; 1028 1029 PetscFunctionBegin; 1030 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 #undef __FUNCT__ 1035 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1036 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1037 { 1038 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1039 PetscErrorCode ierr; 1040 1041 PetscFunctionBegin; 1042 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1047 #undef __FUNCT__ 1048 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1049 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1050 { 1051 PetscErrorCode ierr; 1052 PetscInt i,n; 1053 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1054 PetscReal *work; 1055 1056 PetscFunctionBegin; 1057 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1058 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1059 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1060 if (type == NORM_2) { 1061 for (i=0; i<n; i++) work[i] *= work[i]; 1062 } 1063 if (type == NORM_INFINITY) { 1064 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1065 } else { 1066 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1067 } 1068 ierr = PetscFree(work);CHKERRQ(ierr); 1069 if (type == NORM_2) { 1070 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1071 } 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatSetRandom_MPIDense" 1077 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1078 { 1079 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1080 PetscErrorCode ierr; 1081 PetscScalar *a; 1082 PetscInt m,n,i; 1083 1084 PetscFunctionBegin; 1085 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1086 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1087 for (i=0; i<m*n; i++) { 1088 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1089 } 1090 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMissingDiagonal_MPIDense" 1098 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1099 { 1100 PetscFunctionBegin; 1101 *missing = PETSC_FALSE; 1102 PetscFunctionReturn(0); 1103 } 1104 1105 /* -------------------------------------------------------------------*/ 1106 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1107 MatGetRow_MPIDense, 1108 MatRestoreRow_MPIDense, 1109 MatMult_MPIDense, 1110 /* 4*/ MatMultAdd_MPIDense, 1111 MatMultTranspose_MPIDense, 1112 MatMultTransposeAdd_MPIDense, 1113 0, 1114 0, 1115 0, 1116 /* 10*/ 0, 1117 0, 1118 0, 1119 0, 1120 MatTranspose_MPIDense, 1121 /* 15*/ MatGetInfo_MPIDense, 1122 MatEqual_MPIDense, 1123 MatGetDiagonal_MPIDense, 1124 MatDiagonalScale_MPIDense, 1125 MatNorm_MPIDense, 1126 /* 20*/ MatAssemblyBegin_MPIDense, 1127 MatAssemblyEnd_MPIDense, 1128 MatSetOption_MPIDense, 1129 MatZeroEntries_MPIDense, 1130 /* 24*/ MatZeroRows_MPIDense, 1131 0, 1132 0, 1133 0, 1134 0, 1135 /* 29*/ MatSetUp_MPIDense, 1136 0, 1137 0, 1138 0, 1139 0, 1140 /* 34*/ MatDuplicate_MPIDense, 1141 0, 1142 0, 1143 0, 1144 0, 1145 /* 39*/ MatAXPY_MPIDense, 1146 MatGetSubMatrices_MPIDense, 1147 0, 1148 MatGetValues_MPIDense, 1149 0, 1150 /* 44*/ 0, 1151 MatScale_MPIDense, 1152 MatShift_Basic, 1153 0, 1154 0, 1155 /* 49*/ MatSetRandom_MPIDense, 1156 0, 1157 0, 1158 0, 1159 0, 1160 /* 54*/ 0, 1161 0, 1162 0, 1163 0, 1164 0, 1165 /* 59*/ MatGetSubMatrix_MPIDense, 1166 MatDestroy_MPIDense, 1167 MatView_MPIDense, 1168 0, 1169 0, 1170 /* 64*/ 0, 1171 0, 1172 0, 1173 0, 1174 0, 1175 /* 69*/ 0, 1176 0, 1177 0, 1178 0, 1179 0, 1180 /* 74*/ 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /* 79*/ 0, 1186 0, 1187 0, 1188 0, 1189 /* 83*/ MatLoad_MPIDense, 1190 0, 1191 0, 1192 0, 1193 0, 1194 0, 1195 #if defined(PETSC_HAVE_ELEMENTAL) 1196 /* 89*/ MatMatMult_MPIDense_MPIDense, 1197 MatMatMultSymbolic_MPIDense_MPIDense, 1198 #else 1199 /* 89*/ 0, 1200 0, 1201 #endif 1202 MatMatMultNumeric_MPIDense, 1203 0, 1204 0, 1205 /* 94*/ 0, 1206 0, 1207 0, 1208 0, 1209 0, 1210 /* 99*/ 0, 1211 0, 1212 0, 1213 MatConjugate_MPIDense, 1214 0, 1215 /*104*/ 0, 1216 MatRealPart_MPIDense, 1217 MatImaginaryPart_MPIDense, 1218 0, 1219 0, 1220 /*109*/ 0, 1221 0, 1222 0, 1223 0, 1224 MatMissingDiagonal_MPIDense, 1225 /*114*/ 0, 1226 0, 1227 0, 1228 0, 1229 0, 1230 /*119*/ 0, 1231 0, 1232 0, 1233 0, 1234 0, 1235 /*124*/ 0, 1236 MatGetColumnNorms_MPIDense, 1237 0, 1238 0, 1239 0, 1240 /*129*/ 0, 1241 MatTransposeMatMult_MPIDense_MPIDense, 1242 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1243 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1244 0, 1245 /*134*/ 0, 1246 0, 1247 0, 1248 0, 1249 0, 1250 /*139*/ 0, 1251 0, 1252 0 1253 }; 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1257 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1258 { 1259 Mat_MPIDense *a; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 mat->preallocated = PETSC_TRUE; 1264 /* Note: For now, when data is specified above, this assumes the user correctly 1265 allocates the local dense storage space. We should add error checking. */ 1266 1267 a = (Mat_MPIDense*)mat->data; 1268 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1269 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1270 a->nvec = mat->cmap->n; 1271 1272 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1273 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1274 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1275 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1276 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #if defined(PETSC_HAVE_ELEMENTAL) 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatConvert_MPIDense_Elemental" 1283 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1284 { 1285 Mat mat_elemental; 1286 PetscErrorCode ierr; 1287 PetscScalar *v; 1288 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1289 1290 PetscFunctionBegin; 1291 if (reuse == MAT_REUSE_MATRIX) { 1292 mat_elemental = *newmat; 1293 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1294 } else { 1295 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1296 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1297 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1298 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1299 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1300 } 1301 1302 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1303 for (i=0; i<N; i++) cols[i] = i; 1304 for (i=0; i<m; i++) rows[i] = rstart + i; 1305 1306 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1307 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1308 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1309 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1310 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1311 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1312 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1313 1314 if (reuse == MAT_INPLACE_MATRIX) { 1315 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1316 } else { 1317 *newmat = mat_elemental; 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 #endif 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatCreate_MPIDense" 1325 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1326 { 1327 Mat_MPIDense *a; 1328 PetscErrorCode ierr; 1329 1330 PetscFunctionBegin; 1331 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1332 mat->data = (void*)a; 1333 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1334 1335 mat->insertmode = NOT_SET_VALUES; 1336 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1337 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1338 1339 /* build cache for off array entries formed */ 1340 a->donotstash = PETSC_FALSE; 1341 1342 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1343 1344 /* stuff used for matrix vector multiply */ 1345 a->lvec = 0; 1346 a->Mvctx = 0; 1347 a->roworiented = PETSC_TRUE; 1348 1349 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1350 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1351 #if defined(PETSC_HAVE_ELEMENTAL) 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1353 #endif 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1359 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1363 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 /*MC 1368 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1369 1370 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1371 and MATMPIDENSE otherwise. 1372 1373 Options Database Keys: 1374 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1375 1376 Level: beginner 1377 1378 1379 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1380 M*/ 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1384 /*@C 1385 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1386 1387 Not collective 1388 1389 Input Parameters: 1390 . B - the matrix 1391 - data - optional location of matrix data. Set data=NULL for PETSc 1392 to control all matrix memory allocation. 1393 1394 Notes: 1395 The dense format is fully compatible with standard Fortran 77 1396 storage by columns. 1397 1398 The data input variable is intended primarily for Fortran programmers 1399 who wish to allocate their own matrix memory space. Most users should 1400 set data=NULL. 1401 1402 Level: intermediate 1403 1404 .keywords: matrix,dense, parallel 1405 1406 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1407 @*/ 1408 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1409 { 1410 PetscErrorCode ierr; 1411 1412 PetscFunctionBegin; 1413 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatCreateDense" 1419 /*@C 1420 MatCreateDense - Creates a parallel matrix in dense format. 1421 1422 Collective on MPI_Comm 1423 1424 Input Parameters: 1425 + comm - MPI communicator 1426 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1427 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1428 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1429 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1430 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1431 to control all matrix memory allocation. 1432 1433 Output Parameter: 1434 . A - the matrix 1435 1436 Notes: 1437 The dense format is fully compatible with standard Fortran 77 1438 storage by columns. 1439 1440 The data input variable is intended primarily for Fortran programmers 1441 who wish to allocate their own matrix memory space. Most users should 1442 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1443 1444 The user MUST specify either the local or global matrix dimensions 1445 (possibly both). 1446 1447 Level: intermediate 1448 1449 .keywords: matrix,dense, parallel 1450 1451 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1452 @*/ 1453 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1454 { 1455 PetscErrorCode ierr; 1456 PetscMPIInt size; 1457 1458 PetscFunctionBegin; 1459 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1460 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1461 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1462 if (size > 1) { 1463 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1464 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1465 if (data) { /* user provided data array, so no need to assemble */ 1466 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1467 (*A)->assembled = PETSC_TRUE; 1468 } 1469 } else { 1470 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1471 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatDuplicate_MPIDense" 1478 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1479 { 1480 Mat mat; 1481 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1482 PetscErrorCode ierr; 1483 1484 PetscFunctionBegin; 1485 *newmat = 0; 1486 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1487 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1488 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1489 a = (Mat_MPIDense*)mat->data; 1490 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1491 1492 mat->factortype = A->factortype; 1493 mat->assembled = PETSC_TRUE; 1494 mat->preallocated = PETSC_TRUE; 1495 1496 a->size = oldmat->size; 1497 a->rank = oldmat->rank; 1498 mat->insertmode = NOT_SET_VALUES; 1499 a->nvec = oldmat->nvec; 1500 a->donotstash = oldmat->donotstash; 1501 1502 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1503 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1504 1505 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1506 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1507 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1508 1509 *newmat = mat; 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1515 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1516 { 1517 PetscErrorCode ierr; 1518 PetscMPIInt rank,size; 1519 const PetscInt *rowners; 1520 PetscInt i,m,n,nz,j,mMax; 1521 PetscScalar *array,*vals,*vals_ptr; 1522 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1523 1524 PetscFunctionBegin; 1525 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1527 1528 /* determine ownership of rows and columns */ 1529 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1530 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1531 1532 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1533 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1534 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1535 } 1536 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1537 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1538 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1539 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1540 if (!rank) { 1541 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1542 1543 /* read in my part of the matrix numerical values */ 1544 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1545 1546 /* insert into matrix-by row (this is why cannot directly read into array */ 1547 vals_ptr = vals; 1548 for (i=0; i<m; i++) { 1549 for (j=0; j<N; j++) { 1550 array[i + j*m] = *vals_ptr++; 1551 } 1552 } 1553 1554 /* read in other processors and ship out */ 1555 for (i=1; i<size; i++) { 1556 nz = (rowners[i+1] - rowners[i])*N; 1557 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1558 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1559 } 1560 } else { 1561 /* receive numeric values */ 1562 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1563 1564 /* receive message of values*/ 1565 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1566 1567 /* insert into matrix-by row (this is why cannot directly read into array */ 1568 vals_ptr = vals; 1569 for (i=0; i<m; i++) { 1570 for (j=0; j<N; j++) { 1571 array[i + j*m] = *vals_ptr++; 1572 } 1573 } 1574 } 1575 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1576 ierr = PetscFree(vals);CHKERRQ(ierr); 1577 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1578 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatLoad_MPIDense" 1584 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1585 { 1586 Mat_MPIDense *a; 1587 PetscScalar *vals,*svals; 1588 MPI_Comm comm; 1589 MPI_Status status; 1590 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1591 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1592 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1593 PetscInt i,nz,j,rstart,rend; 1594 int fd; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 /* force binary viewer to load .info file if it has not yet done so */ 1599 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1600 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1601 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1602 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1603 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1604 if (!rank) { 1605 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1606 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1607 } 1608 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1609 M = header[1]; N = header[2]; nz = header[3]; 1610 1611 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1612 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1613 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1614 1615 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1616 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1617 1618 /* 1619 Handle case where matrix is stored on disk as a dense matrix 1620 */ 1621 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1622 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1623 PetscFunctionReturn(0); 1624 } 1625 1626 /* determine ownership of all rows */ 1627 if (newmat->rmap->n < 0) { 1628 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1629 } else { 1630 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1631 } 1632 if (newmat->cmap->n < 0) { 1633 n = PETSC_DECIDE; 1634 } else { 1635 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1636 } 1637 1638 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1639 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1640 rowners[0] = 0; 1641 for (i=2; i<=size; i++) { 1642 rowners[i] += rowners[i-1]; 1643 } 1644 rstart = rowners[rank]; 1645 rend = rowners[rank+1]; 1646 1647 /* distribute row lengths to all processors */ 1648 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1649 if (!rank) { 1650 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1651 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1652 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1653 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1654 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1655 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1656 } else { 1657 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1658 } 1659 1660 if (!rank) { 1661 /* calculate the number of nonzeros on each processor */ 1662 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1663 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1664 for (i=0; i<size; i++) { 1665 for (j=rowners[i]; j< rowners[i+1]; j++) { 1666 procsnz[i] += rowlengths[j]; 1667 } 1668 } 1669 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1670 1671 /* determine max buffer needed and allocate it */ 1672 maxnz = 0; 1673 for (i=0; i<size; i++) { 1674 maxnz = PetscMax(maxnz,procsnz[i]); 1675 } 1676 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1677 1678 /* read in my part of the matrix column indices */ 1679 nz = procsnz[0]; 1680 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1681 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1682 1683 /* read in every one elses and ship off */ 1684 for (i=1; i<size; i++) { 1685 nz = procsnz[i]; 1686 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1687 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1688 } 1689 ierr = PetscFree(cols);CHKERRQ(ierr); 1690 } else { 1691 /* determine buffer space needed for message */ 1692 nz = 0; 1693 for (i=0; i<m; i++) { 1694 nz += ourlens[i]; 1695 } 1696 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1697 1698 /* receive message of column indices*/ 1699 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1700 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1701 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1702 } 1703 1704 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1705 a = (Mat_MPIDense*)newmat->data; 1706 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1707 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1708 } 1709 1710 if (!rank) { 1711 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1712 1713 /* read in my part of the matrix numerical values */ 1714 nz = procsnz[0]; 1715 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1716 1717 /* insert into matrix */ 1718 jj = rstart; 1719 smycols = mycols; 1720 svals = vals; 1721 for (i=0; i<m; i++) { 1722 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1723 smycols += ourlens[i]; 1724 svals += ourlens[i]; 1725 jj++; 1726 } 1727 1728 /* read in other processors and ship out */ 1729 for (i=1; i<size; i++) { 1730 nz = procsnz[i]; 1731 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1732 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1733 } 1734 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1735 } else { 1736 /* receive numeric values */ 1737 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1738 1739 /* receive message of values*/ 1740 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1741 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1742 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1743 1744 /* insert into matrix */ 1745 jj = rstart; 1746 smycols = mycols; 1747 svals = vals; 1748 for (i=0; i<m; i++) { 1749 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1750 smycols += ourlens[i]; 1751 svals += ourlens[i]; 1752 jj++; 1753 } 1754 } 1755 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1756 ierr = PetscFree(vals);CHKERRQ(ierr); 1757 ierr = PetscFree(mycols);CHKERRQ(ierr); 1758 ierr = PetscFree(rowners);CHKERRQ(ierr); 1759 1760 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1761 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatEqual_MPIDense" 1767 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1768 { 1769 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1770 Mat a,b; 1771 PetscBool flg; 1772 PetscErrorCode ierr; 1773 1774 PetscFunctionBegin; 1775 a = matA->A; 1776 b = matB->A; 1777 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1778 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense" 1784 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1785 { 1786 PetscErrorCode ierr; 1787 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1788 Mat_TransMatMultDense *atb = a->atbdense; 1789 1790 PetscFunctionBegin; 1791 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1792 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1793 ierr = PetscFree(atb);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 #undef __FUNCT__ 1798 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense" 1799 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1800 { 1801 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1802 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1803 Mat_TransMatMultDense *atb = c->atbdense; 1804 PetscErrorCode ierr; 1805 MPI_Comm comm; 1806 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1807 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1808 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1809 PetscScalar _DOne=1.0,_DZero=0.0; 1810 PetscBLASInt am,an,bn,aN; 1811 const PetscInt *ranges; 1812 1813 PetscFunctionBegin; 1814 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1815 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1817 1818 /* compute atbarray = aseq^T * bseq */ 1819 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1820 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1821 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1822 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1823 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1824 1825 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1826 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1827 1828 /* arrange atbarray into sendbuf */ 1829 k = 0; 1830 for (proc=0; proc<size; proc++) { 1831 for (j=0; j<cN; j++) { 1832 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1833 } 1834 } 1835 /* sum all atbarray to local values of C */ 1836 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1837 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1838 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 #undef __FUNCT__ 1843 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense" 1844 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1845 { 1846 PetscErrorCode ierr; 1847 Mat Cdense; 1848 MPI_Comm comm; 1849 PetscMPIInt size; 1850 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1851 Mat_MPIDense *c; 1852 Mat_TransMatMultDense *atb; 1853 1854 PetscFunctionBegin; 1855 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1856 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1857 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1858 } 1859 1860 /* create matrix product Cdense */ 1861 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1862 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1863 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1864 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1865 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1866 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1867 *C = Cdense; 1868 1869 /* create data structure for reuse Cdense */ 1870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1871 ierr = PetscNew(&atb);CHKERRQ(ierr); 1872 cM = Cdense->rmap->N; 1873 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1874 1875 c = (Mat_MPIDense*)Cdense->data; 1876 c->atbdense = atb; 1877 atb->destroy = Cdense->ops->destroy; 1878 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense" 1884 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1885 { 1886 PetscErrorCode ierr; 1887 1888 PetscFunctionBegin; 1889 if (scall == MAT_INITIAL_MATRIX) { 1890 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1891 } 1892 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense" 1898 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1899 { 1900 PetscErrorCode ierr; 1901 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1902 Mat_MatMultDense *ab = a->abdense; 1903 1904 PetscFunctionBegin; 1905 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1906 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1907 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1908 1909 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1910 ierr = PetscFree(ab);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #if defined(PETSC_HAVE_ELEMENTAL) 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1917 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1918 { 1919 PetscErrorCode ierr; 1920 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1921 Mat_MatMultDense *ab=c->abdense; 1922 1923 PetscFunctionBegin; 1924 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1925 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1926 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1927 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1928 PetscFunctionReturn(0); 1929 } 1930 1931 #undef __FUNCT__ 1932 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1933 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1934 { 1935 PetscErrorCode ierr; 1936 Mat Ae,Be,Ce; 1937 Mat_MPIDense *c; 1938 Mat_MatMultDense *ab; 1939 1940 PetscFunctionBegin; 1941 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1942 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1943 } 1944 1945 /* convert A and B to Elemental matrices Ae and Be */ 1946 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1947 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1948 1949 /* Ce = Ae*Be */ 1950 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1951 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1952 1953 /* convert Ce to C */ 1954 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1955 1956 /* create data structure for reuse Cdense */ 1957 ierr = PetscNew(&ab);CHKERRQ(ierr); 1958 c = (Mat_MPIDense*)(*C)->data; 1959 c->abdense = ab; 1960 1961 ab->Ae = Ae; 1962 ab->Be = Be; 1963 ab->Ce = Ce; 1964 ab->destroy = (*C)->ops->destroy; 1965 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1966 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1972 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1973 { 1974 PetscErrorCode ierr; 1975 1976 PetscFunctionBegin; 1977 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1978 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1979 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1980 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1981 } else { 1982 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1983 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1984 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1985 } 1986 PetscFunctionReturn(0); 1987 } 1988 #endif 1989