1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseGetLocalMatrix" 13 /*@ 14 15 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 16 matrix that represents the operator. For sequential matrices it returns itself. 17 18 Input Parameter: 19 . A - the Seq or MPI dense matrix 20 21 Output Parameter: 22 . B - the inner matrix 23 24 Level: intermediate 25 26 @*/ 27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 28 { 29 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 30 PetscErrorCode ierr; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 35 if (flg) *B = mat->A; 36 else *B = A; 37 PetscFunctionReturn(0); 38 } 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "MatGetRow_MPIDense" 42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 43 { 44 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 45 PetscErrorCode ierr; 46 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 47 48 PetscFunctionBegin; 49 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 50 lrow = row - rstart; 51 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 52 PetscFunctionReturn(0); 53 } 54 55 #undef __FUNCT__ 56 #define __FUNCT__ "MatRestoreRow_MPIDense" 57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 58 { 59 PetscErrorCode ierr; 60 61 PetscFunctionBegin; 62 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 63 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 64 PetscFunctionReturn(0); 65 } 66 67 #undef __FUNCT__ 68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 70 { 71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 72 PetscErrorCode ierr; 73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 74 PetscScalar *array; 75 MPI_Comm comm; 76 Mat B; 77 78 PetscFunctionBegin; 79 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 80 81 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 82 if (!B) { 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatSetValues_MPIDense" 101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 102 { 103 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 104 PetscErrorCode ierr; 105 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 106 PetscBool roworiented = A->roworiented; 107 108 PetscFunctionBegin; 109 for (i=0; i<m; i++) { 110 if (idxm[i] < 0) continue; 111 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 112 if (idxm[i] >= rstart && idxm[i] < rend) { 113 row = idxm[i] - rstart; 114 if (roworiented) { 115 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 116 } else { 117 for (j=0; j<n; j++) { 118 if (idxn[j] < 0) continue; 119 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 120 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 121 } 122 } 123 } else if (!A->donotstash) { 124 mat->assembled = PETSC_FALSE; 125 if (roworiented) { 126 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 127 } else { 128 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetValues_MPIDense" 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatDenseGetArray_MPIDense" 161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 174 { 175 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 176 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 177 PetscErrorCode ierr; 178 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 179 const PetscInt *irow,*icol; 180 PetscScalar *av,*bv,*v = lmat->v; 181 Mat newmat; 182 IS iscol_local; 183 184 PetscFunctionBegin; 185 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 186 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 187 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 190 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 191 192 /* No parallel redistribution currently supported! Should really check each index set 193 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 194 original matrix! */ 195 196 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 197 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 198 199 /* Check submatrix call */ 200 if (scall == MAT_REUSE_MATRIX) { 201 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 202 /* Really need to test rows and column sizes! */ 203 newmat = *B; 204 } else { 205 /* Create and fill new matrix */ 206 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 207 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 208 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 209 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 210 } 211 212 /* Now extract the data pointers and do the copy, column at a time */ 213 newmatd = (Mat_MPIDense*)newmat->data; 214 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 215 216 for (i=0; i<Ncols; i++) { 217 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 218 for (j=0; j<nrows; j++) { 219 *bv++ = av[irow[j] - rstart]; 220 } 221 } 222 223 /* Assemble the matrices so that the correct flags are set */ 224 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 227 /* Free work space */ 228 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 230 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 231 *B = newmat; 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 280 PetscFunctionBegin; 281 /* wait on receives */ 282 while (1) { 283 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 284 if (!flg) break; 285 286 for (i=0; i<n;) { 287 /* Now identify the consecutive vals belonging to the same row */ 288 for (j=i,rstart=row[j]; j<n; j++) { 289 if (row[j] != rstart) break; 290 } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap->range; 334 PetscInt *sizes,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscBool found; 343 const PetscScalar *xx; 344 PetscScalar *bb; 345 346 PetscFunctionBegin; 347 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 348 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 352 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 353 for (i=0; i<N; i++) { 354 idx = rows[i]; 355 found = PETSC_FALSE; 356 for (j=0; j<size; j++) { 357 if (idx >= owners[j] && idx < owners[j+1]) { 358 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 359 } 360 } 361 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 362 } 363 nsends = 0; 364 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 365 366 /* inform other processors of number of messages and max length*/ 367 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 368 369 /* post receives: */ 370 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 371 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 372 for (i=0; i<nrecvs; i++) { 373 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 374 } 375 376 /* do sends: 377 1) starts[i] gives the starting index in svalues for stuff going to 378 the ith processor 379 */ 380 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 381 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 382 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 383 384 starts[0] = 0; 385 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 386 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 387 388 starts[0] = 0; 389 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 390 count = 0; 391 for (i=0; i<size; i++) { 392 if (sizes[2*i+1]) { 393 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 394 } 395 } 396 ierr = PetscFree(starts);CHKERRQ(ierr); 397 398 base = owners[rank]; 399 400 /* wait on receives */ 401 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 402 count = nrecvs; 403 slen = 0; 404 while (count) { 405 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 406 /* unpack receives into our local space */ 407 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 408 409 source[imdex] = recv_status.MPI_SOURCE; 410 lens[imdex] = n; 411 slen += n; 412 count--; 413 } 414 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 415 416 /* move the data into the send scatter */ 417 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 418 count = 0; 419 for (i=0; i<nrecvs; i++) { 420 values = rvalues + i*nmax; 421 for (j=0; j<lens[i]; j++) { 422 lrows[count++] = values[j] - base; 423 } 424 } 425 ierr = PetscFree(rvalues);CHKERRQ(ierr); 426 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 427 ierr = PetscFree(owner);CHKERRQ(ierr); 428 ierr = PetscFree(sizes);CHKERRQ(ierr); 429 430 /* fix right hand side if needed */ 431 if (x && b) { 432 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 433 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 434 for (i=0; i<slen; i++) { 435 bb[lrows[i]] = diag*xx[lrows[i]]; 436 } 437 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 438 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 439 } 440 441 /* actually zap the local rows */ 442 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 443 if (diag != 0.0) { 444 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 445 PetscInt m = ll->lda, i; 446 447 for (i=0; i<slen; i++) { 448 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 449 } 450 } 451 ierr = PetscFree(lrows);CHKERRQ(ierr); 452 453 /* wait on sends */ 454 if (nsends) { 455 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 456 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 457 ierr = PetscFree(send_status);CHKERRQ(ierr); 458 } 459 ierr = PetscFree(send_waits);CHKERRQ(ierr); 460 ierr = PetscFree(svalues);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 465 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 466 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 467 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatMult_MPIDense" 471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 472 { 473 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatMultAdd_MPIDense" 485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 486 { 487 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 488 PetscErrorCode ierr; 489 490 PetscFunctionBegin; 491 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 492 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 493 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatMultTranspose_MPIDense" 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 PetscErrorCode ierr; 503 PetscScalar zero = 0.0; 504 505 PetscFunctionBegin; 506 ierr = VecSet(yy,zero);CHKERRQ(ierr); 507 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 508 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 509 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 516 { 517 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 518 PetscErrorCode ierr; 519 520 PetscFunctionBegin; 521 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 522 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 523 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 524 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 525 PetscFunctionReturn(0); 526 } 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatGetDiagonal_MPIDense" 530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 531 { 532 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 533 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 534 PetscErrorCode ierr; 535 PetscInt len,i,n,m = A->rmap->n,radd; 536 PetscScalar *x,zero = 0.0; 537 538 PetscFunctionBegin; 539 ierr = VecSet(v,zero);CHKERRQ(ierr); 540 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 541 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 542 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 543 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 544 radd = A->rmap->rstart*m; 545 for (i=0; i<len; i++) { 546 x[i] = aloc->v[radd + i*m + i]; 547 } 548 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 549 PetscFunctionReturn(0); 550 } 551 552 #undef __FUNCT__ 553 #define __FUNCT__ "MatDestroy_MPIDense" 554 PetscErrorCode MatDestroy_MPIDense(Mat mat) 555 { 556 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 557 PetscErrorCode ierr; 558 559 PetscFunctionBegin; 560 #if defined(PETSC_USE_LOG) 561 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 562 #endif 563 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 564 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 565 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 566 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 567 568 ierr = PetscFree(mat->data);CHKERRQ(ierr); 569 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 570 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 573 #if defined(PETSC_HAVE_ELEMENTAL) 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 575 #endif 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatView_MPIDense_Binary" 589 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 590 { 591 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 592 PetscErrorCode ierr; 593 PetscViewerFormat format; 594 int fd; 595 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 596 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 597 PetscScalar *work,*v,*vv; 598 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 599 600 PetscFunctionBegin; 601 if (mdn->size == 1) { 602 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 603 } else { 604 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 605 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 606 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 607 608 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 609 if (format == PETSC_VIEWER_NATIVE) { 610 611 if (!rank) { 612 /* store the matrix as a dense matrix */ 613 header[0] = MAT_FILE_CLASSID; 614 header[1] = mat->rmap->N; 615 header[2] = N; 616 header[3] = MATRIX_BINARY_FORMAT_DENSE; 617 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 618 619 /* get largest work array needed for transposing array */ 620 mmax = mat->rmap->n; 621 for (i=1; i<size; i++) { 622 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 623 } 624 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 625 626 /* write out local array, by rows */ 627 m = mat->rmap->n; 628 v = a->v; 629 for (j=0; j<N; j++) { 630 for (i=0; i<m; i++) { 631 work[j + i*N] = *v++; 632 } 633 } 634 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 635 /* get largest work array to receive messages from other processes, excludes process zero */ 636 mmax = 0; 637 for (i=1; i<size; i++) { 638 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 639 } 640 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 641 for (k = 1; k < size; k++) { 642 v = vv; 643 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 644 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 645 646 for (j = 0; j < N; j++) { 647 for (i = 0; i < m; i++) { 648 work[j + i*N] = *v++; 649 } 650 } 651 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 652 } 653 ierr = PetscFree(work);CHKERRQ(ierr); 654 ierr = PetscFree(vv);CHKERRQ(ierr); 655 } else { 656 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 } 658 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 659 } 660 PetscFunctionReturn(0); 661 } 662 663 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 664 #include <petscdraw.h> 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 667 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 668 { 669 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 670 PetscErrorCode ierr; 671 PetscMPIInt rank = mdn->rank; 672 PetscViewerType vtype; 673 PetscBool iascii,isdraw; 674 PetscViewer sviewer; 675 PetscViewerFormat format; 676 677 PetscFunctionBegin; 678 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 679 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 680 if (iascii) { 681 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 682 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 683 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 684 MatInfo info; 685 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 686 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 687 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 688 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 689 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 690 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 691 PetscFunctionReturn(0); 692 } else if (format == PETSC_VIEWER_ASCII_INFO) { 693 PetscFunctionReturn(0); 694 } 695 } else if (isdraw) { 696 PetscDraw draw; 697 PetscBool isnull; 698 699 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 700 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 701 if (isnull) PetscFunctionReturn(0); 702 } 703 704 { 705 /* assemble the entire matrix onto first processor. */ 706 Mat A; 707 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 708 PetscInt *cols; 709 PetscScalar *vals; 710 711 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 712 if (!rank) { 713 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 714 } else { 715 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 716 } 717 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 718 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 719 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 720 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 721 722 /* Copy the matrix ... This isn't the most efficient means, 723 but it's quick for now */ 724 A->insertmode = INSERT_VALUES; 725 726 row = mat->rmap->rstart; 727 m = mdn->A->rmap->n; 728 for (i=0; i<m; i++) { 729 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 730 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 731 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 732 row++; 733 } 734 735 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 736 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 737 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 738 if (!rank) { 739 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 740 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 741 } 742 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 743 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 744 ierr = MatDestroy(&A);CHKERRQ(ierr); 745 } 746 PetscFunctionReturn(0); 747 } 748 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatView_MPIDense" 751 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 752 { 753 PetscErrorCode ierr; 754 PetscBool iascii,isbinary,isdraw,issocket; 755 756 PetscFunctionBegin; 757 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 759 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 760 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 761 762 if (iascii || issocket || isdraw) { 763 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 764 } else if (isbinary) { 765 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 766 } 767 PetscFunctionReturn(0); 768 } 769 770 #undef __FUNCT__ 771 #define __FUNCT__ "MatGetInfo_MPIDense" 772 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 773 { 774 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 775 Mat mdn = mat->A; 776 PetscErrorCode ierr; 777 PetscReal isend[5],irecv[5]; 778 779 PetscFunctionBegin; 780 info->block_size = 1.0; 781 782 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 783 784 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 785 isend[3] = info->memory; isend[4] = info->mallocs; 786 if (flag == MAT_LOCAL) { 787 info->nz_used = isend[0]; 788 info->nz_allocated = isend[1]; 789 info->nz_unneeded = isend[2]; 790 info->memory = isend[3]; 791 info->mallocs = isend[4]; 792 } else if (flag == MAT_GLOBAL_MAX) { 793 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 794 795 info->nz_used = irecv[0]; 796 info->nz_allocated = irecv[1]; 797 info->nz_unneeded = irecv[2]; 798 info->memory = irecv[3]; 799 info->mallocs = irecv[4]; 800 } else if (flag == MAT_GLOBAL_SUM) { 801 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 802 803 info->nz_used = irecv[0]; 804 info->nz_allocated = irecv[1]; 805 info->nz_unneeded = irecv[2]; 806 info->memory = irecv[3]; 807 info->mallocs = irecv[4]; 808 } 809 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 810 info->fill_ratio_needed = 0; 811 info->factor_mallocs = 0; 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "MatSetOption_MPIDense" 817 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 818 { 819 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 820 PetscErrorCode ierr; 821 822 PetscFunctionBegin; 823 switch (op) { 824 case MAT_NEW_NONZERO_LOCATIONS: 825 case MAT_NEW_NONZERO_LOCATION_ERR: 826 case MAT_NEW_NONZERO_ALLOCATION_ERR: 827 MatCheckPreallocated(A,1); 828 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 829 break; 830 case MAT_ROW_ORIENTED: 831 MatCheckPreallocated(A,1); 832 a->roworiented = flg; 833 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 834 break; 835 case MAT_NEW_DIAGONALS: 836 case MAT_KEEP_NONZERO_PATTERN: 837 case MAT_USE_HASH_TABLE: 838 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 839 break; 840 case MAT_IGNORE_OFF_PROC_ENTRIES: 841 a->donotstash = flg; 842 break; 843 case MAT_SYMMETRIC: 844 case MAT_STRUCTURALLY_SYMMETRIC: 845 case MAT_HERMITIAN: 846 case MAT_SYMMETRY_ETERNAL: 847 case MAT_IGNORE_LOWER_TRIANGULAR: 848 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 849 break; 850 default: 851 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatDiagonalScale_MPIDense" 859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 860 { 861 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 862 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 863 PetscScalar *l,*r,x,*v; 864 PetscErrorCode ierr; 865 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 866 867 PetscFunctionBegin; 868 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 869 if (ll) { 870 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 871 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 872 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 873 for (i=0; i<m; i++) { 874 x = l[i]; 875 v = mat->v + i; 876 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 877 } 878 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 879 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 880 } 881 if (rr) { 882 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 883 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 884 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 887 for (i=0; i<n; i++) { 888 x = r[i]; 889 v = mat->v + i*m; 890 for (j=0; j<m; j++) (*v++) *= x; 891 } 892 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 893 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 894 } 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatNorm_MPIDense" 900 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 901 { 902 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 903 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 904 PetscErrorCode ierr; 905 PetscInt i,j; 906 PetscReal sum = 0.0; 907 PetscScalar *v = mat->v; 908 909 PetscFunctionBegin; 910 if (mdn->size == 1) { 911 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 912 } else { 913 if (type == NORM_FROBENIUS) { 914 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 915 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 916 } 917 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 918 *nrm = PetscSqrtReal(*nrm); 919 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 920 } else if (type == NORM_1) { 921 PetscReal *tmp,*tmp2; 922 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 923 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 924 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 925 *nrm = 0.0; 926 v = mat->v; 927 for (j=0; j<mdn->A->cmap->n; j++) { 928 for (i=0; i<mdn->A->rmap->n; i++) { 929 tmp[j] += PetscAbsScalar(*v); v++; 930 } 931 } 932 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 933 for (j=0; j<A->cmap->N; j++) { 934 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 935 } 936 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 937 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 938 } else if (type == NORM_INFINITY) { /* max row norm */ 939 PetscReal ntemp; 940 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 941 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 942 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatTranspose_MPIDense" 949 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 950 { 951 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 952 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 953 Mat B; 954 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 955 PetscErrorCode ierr; 956 PetscInt j,i; 957 PetscScalar *v; 958 959 PetscFunctionBegin; 960 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 961 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 962 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 963 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 964 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 965 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 966 } else { 967 B = *matout; 968 } 969 970 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 971 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 972 for (i=0; i<m; i++) rwork[i] = rstart + i; 973 for (j=0; j<n; j++) { 974 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 975 v += m; 976 } 977 ierr = PetscFree(rwork);CHKERRQ(ierr); 978 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 981 *matout = B; 982 } else { 983 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 984 } 985 PetscFunctionReturn(0); 986 } 987 988 989 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 990 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatSetUp_MPIDense" 994 PetscErrorCode MatSetUp_MPIDense(Mat A) 995 { 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatAXPY_MPIDense" 1005 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1006 { 1007 PetscErrorCode ierr; 1008 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1009 1010 PetscFunctionBegin; 1011 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1012 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatConjugate_MPIDense" 1018 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1019 { 1020 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatRealPart_MPIDense" 1030 PetscErrorCode MatRealPart_MPIDense(Mat A) 1031 { 1032 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1033 PetscErrorCode ierr; 1034 1035 PetscFunctionBegin; 1036 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1042 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1043 { 1044 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1053 #undef __FUNCT__ 1054 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1055 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1056 { 1057 PetscErrorCode ierr; 1058 PetscInt i,n; 1059 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1060 PetscReal *work; 1061 1062 PetscFunctionBegin; 1063 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1064 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1065 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1066 if (type == NORM_2) { 1067 for (i=0; i<n; i++) work[i] *= work[i]; 1068 } 1069 if (type == NORM_INFINITY) { 1070 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1071 } else { 1072 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1073 } 1074 ierr = PetscFree(work);CHKERRQ(ierr); 1075 if (type == NORM_2) { 1076 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1077 } 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatSetRandom_MPIDense" 1083 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1084 { 1085 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1086 PetscErrorCode ierr; 1087 PetscScalar *a; 1088 PetscInt m,n,i; 1089 1090 PetscFunctionBegin; 1091 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1092 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1093 for (i=0; i<m*n; i++) { 1094 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1095 } 1096 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatMissingDiagonal_MPIDense" 1104 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1105 { 1106 PetscFunctionBegin; 1107 *missing = PETSC_FALSE; 1108 PetscFunctionReturn(0); 1109 } 1110 1111 /* -------------------------------------------------------------------*/ 1112 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1113 MatGetRow_MPIDense, 1114 MatRestoreRow_MPIDense, 1115 MatMult_MPIDense, 1116 /* 4*/ MatMultAdd_MPIDense, 1117 MatMultTranspose_MPIDense, 1118 MatMultTransposeAdd_MPIDense, 1119 0, 1120 0, 1121 0, 1122 /* 10*/ 0, 1123 0, 1124 0, 1125 0, 1126 MatTranspose_MPIDense, 1127 /* 15*/ MatGetInfo_MPIDense, 1128 MatEqual_MPIDense, 1129 MatGetDiagonal_MPIDense, 1130 MatDiagonalScale_MPIDense, 1131 MatNorm_MPIDense, 1132 /* 20*/ MatAssemblyBegin_MPIDense, 1133 MatAssemblyEnd_MPIDense, 1134 MatSetOption_MPIDense, 1135 MatZeroEntries_MPIDense, 1136 /* 24*/ MatZeroRows_MPIDense, 1137 0, 1138 0, 1139 0, 1140 0, 1141 /* 29*/ MatSetUp_MPIDense, 1142 0, 1143 0, 1144 0, 1145 0, 1146 /* 34*/ MatDuplicate_MPIDense, 1147 0, 1148 0, 1149 0, 1150 0, 1151 /* 39*/ MatAXPY_MPIDense, 1152 MatGetSubMatrices_MPIDense, 1153 0, 1154 MatGetValues_MPIDense, 1155 0, 1156 /* 44*/ 0, 1157 MatScale_MPIDense, 1158 MatShift_Basic, 1159 0, 1160 0, 1161 /* 49*/ MatSetRandom_MPIDense, 1162 0, 1163 0, 1164 0, 1165 0, 1166 /* 54*/ 0, 1167 0, 1168 0, 1169 0, 1170 0, 1171 /* 59*/ MatGetSubMatrix_MPIDense, 1172 MatDestroy_MPIDense, 1173 MatView_MPIDense, 1174 0, 1175 0, 1176 /* 64*/ 0, 1177 0, 1178 0, 1179 0, 1180 0, 1181 /* 69*/ 0, 1182 0, 1183 0, 1184 0, 1185 0, 1186 /* 74*/ 0, 1187 0, 1188 0, 1189 0, 1190 0, 1191 /* 79*/ 0, 1192 0, 1193 0, 1194 0, 1195 /* 83*/ MatLoad_MPIDense, 1196 0, 1197 0, 1198 0, 1199 0, 1200 0, 1201 #if defined(PETSC_HAVE_ELEMENTAL) 1202 /* 89*/ MatMatMult_MPIDense_MPIDense, 1203 MatMatMultSymbolic_MPIDense_MPIDense, 1204 #else 1205 /* 89*/ 0, 1206 0, 1207 #endif 1208 MatMatMultNumeric_MPIDense, 1209 0, 1210 0, 1211 /* 94*/ 0, 1212 0, 1213 0, 1214 0, 1215 0, 1216 /* 99*/ 0, 1217 0, 1218 0, 1219 MatConjugate_MPIDense, 1220 0, 1221 /*104*/ 0, 1222 MatRealPart_MPIDense, 1223 MatImaginaryPart_MPIDense, 1224 0, 1225 0, 1226 /*109*/ 0, 1227 0, 1228 0, 1229 0, 1230 MatMissingDiagonal_MPIDense, 1231 /*114*/ 0, 1232 0, 1233 0, 1234 0, 1235 0, 1236 /*119*/ 0, 1237 0, 1238 0, 1239 0, 1240 0, 1241 /*124*/ 0, 1242 MatGetColumnNorms_MPIDense, 1243 0, 1244 0, 1245 0, 1246 /*129*/ 0, 1247 MatTransposeMatMult_MPIDense_MPIDense, 1248 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1249 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1250 0, 1251 /*134*/ 0, 1252 0, 1253 0, 1254 0, 1255 0, 1256 /*139*/ 0, 1257 0, 1258 0 1259 }; 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1263 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1264 { 1265 Mat_MPIDense *a; 1266 PetscErrorCode ierr; 1267 1268 PetscFunctionBegin; 1269 mat->preallocated = PETSC_TRUE; 1270 /* Note: For now, when data is specified above, this assumes the user correctly 1271 allocates the local dense storage space. We should add error checking. */ 1272 1273 a = (Mat_MPIDense*)mat->data; 1274 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1275 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1276 a->nvec = mat->cmap->n; 1277 1278 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1279 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1280 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1281 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1282 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #if defined(PETSC_HAVE_ELEMENTAL) 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatConvert_MPIDense_Elemental" 1289 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1290 { 1291 Mat mat_elemental; 1292 PetscErrorCode ierr; 1293 PetscScalar *v; 1294 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1295 1296 PetscFunctionBegin; 1297 if (reuse == MAT_REUSE_MATRIX) { 1298 mat_elemental = *newmat; 1299 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1300 } else { 1301 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1302 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1303 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1304 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1305 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1306 } 1307 1308 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1309 for (i=0; i<N; i++) cols[i] = i; 1310 for (i=0; i<m; i++) rows[i] = rstart + i; 1311 1312 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1313 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1314 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1315 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1316 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1317 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1318 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1319 1320 if (reuse == MAT_INPLACE_MATRIX) { 1321 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1322 } else { 1323 *newmat = mat_elemental; 1324 } 1325 PetscFunctionReturn(0); 1326 } 1327 #endif 1328 1329 #undef __FUNCT__ 1330 #define __FUNCT__ "MatCreate_MPIDense" 1331 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1332 { 1333 Mat_MPIDense *a; 1334 PetscErrorCode ierr; 1335 1336 PetscFunctionBegin; 1337 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1338 mat->data = (void*)a; 1339 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1340 1341 mat->insertmode = NOT_SET_VALUES; 1342 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1343 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1344 1345 /* build cache for off array entries formed */ 1346 a->donotstash = PETSC_FALSE; 1347 1348 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1349 1350 /* stuff used for matrix vector multiply */ 1351 a->lvec = 0; 1352 a->Mvctx = 0; 1353 a->roworiented = PETSC_TRUE; 1354 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1356 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1357 #if defined(PETSC_HAVE_ELEMENTAL) 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1359 #endif 1360 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1361 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1362 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1363 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1364 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1365 1366 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1367 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1368 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1369 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 /*MC 1374 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1375 1376 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1377 and MATMPIDENSE otherwise. 1378 1379 Options Database Keys: 1380 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1381 1382 Level: beginner 1383 1384 1385 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1386 M*/ 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1390 /*@C 1391 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1392 1393 Not collective 1394 1395 Input Parameters: 1396 . B - the matrix 1397 - data - optional location of matrix data. Set data=NULL for PETSc 1398 to control all matrix memory allocation. 1399 1400 Notes: 1401 The dense format is fully compatible with standard Fortran 77 1402 storage by columns. 1403 1404 The data input variable is intended primarily for Fortran programmers 1405 who wish to allocate their own matrix memory space. Most users should 1406 set data=NULL. 1407 1408 Level: intermediate 1409 1410 .keywords: matrix,dense, parallel 1411 1412 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1413 @*/ 1414 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1415 { 1416 PetscErrorCode ierr; 1417 1418 PetscFunctionBegin; 1419 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatCreateDense" 1425 /*@C 1426 MatCreateDense - Creates a parallel matrix in dense format. 1427 1428 Collective on MPI_Comm 1429 1430 Input Parameters: 1431 + comm - MPI communicator 1432 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1433 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1434 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1435 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1436 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1437 to control all matrix memory allocation. 1438 1439 Output Parameter: 1440 . A - the matrix 1441 1442 Notes: 1443 The dense format is fully compatible with standard Fortran 77 1444 storage by columns. 1445 1446 The data input variable is intended primarily for Fortran programmers 1447 who wish to allocate their own matrix memory space. Most users should 1448 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1449 1450 The user MUST specify either the local or global matrix dimensions 1451 (possibly both). 1452 1453 Level: intermediate 1454 1455 .keywords: matrix,dense, parallel 1456 1457 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1458 @*/ 1459 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1460 { 1461 PetscErrorCode ierr; 1462 PetscMPIInt size; 1463 1464 PetscFunctionBegin; 1465 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1466 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1467 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1468 if (size > 1) { 1469 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1470 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1471 if (data) { /* user provided data array, so no need to assemble */ 1472 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1473 (*A)->assembled = PETSC_TRUE; 1474 } 1475 } else { 1476 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1477 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatDuplicate_MPIDense" 1484 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1485 { 1486 Mat mat; 1487 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1488 PetscErrorCode ierr; 1489 1490 PetscFunctionBegin; 1491 *newmat = 0; 1492 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1493 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1494 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1495 a = (Mat_MPIDense*)mat->data; 1496 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1497 1498 mat->factortype = A->factortype; 1499 mat->assembled = PETSC_TRUE; 1500 mat->preallocated = PETSC_TRUE; 1501 1502 a->size = oldmat->size; 1503 a->rank = oldmat->rank; 1504 mat->insertmode = NOT_SET_VALUES; 1505 a->nvec = oldmat->nvec; 1506 a->donotstash = oldmat->donotstash; 1507 1508 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1509 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1510 1511 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1512 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1513 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1514 1515 *newmat = mat; 1516 PetscFunctionReturn(0); 1517 } 1518 1519 #undef __FUNCT__ 1520 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1521 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1522 { 1523 PetscErrorCode ierr; 1524 PetscMPIInt rank,size; 1525 const PetscInt *rowners; 1526 PetscInt i,m,n,nz,j,mMax; 1527 PetscScalar *array,*vals,*vals_ptr; 1528 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1529 1530 PetscFunctionBegin; 1531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1533 1534 /* determine ownership of rows and columns */ 1535 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1536 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1537 1538 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1539 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1540 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1541 } 1542 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1543 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1544 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1545 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1546 if (!rank) { 1547 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1548 1549 /* read in my part of the matrix numerical values */ 1550 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1551 1552 /* insert into matrix-by row (this is why cannot directly read into array */ 1553 vals_ptr = vals; 1554 for (i=0; i<m; i++) { 1555 for (j=0; j<N; j++) { 1556 array[i + j*m] = *vals_ptr++; 1557 } 1558 } 1559 1560 /* read in other processors and ship out */ 1561 for (i=1; i<size; i++) { 1562 nz = (rowners[i+1] - rowners[i])*N; 1563 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1564 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1565 } 1566 } else { 1567 /* receive numeric values */ 1568 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1569 1570 /* receive message of values*/ 1571 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1572 1573 /* insert into matrix-by row (this is why cannot directly read into array */ 1574 vals_ptr = vals; 1575 for (i=0; i<m; i++) { 1576 for (j=0; j<N; j++) { 1577 array[i + j*m] = *vals_ptr++; 1578 } 1579 } 1580 } 1581 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1582 ierr = PetscFree(vals);CHKERRQ(ierr); 1583 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "MatLoad_MPIDense" 1590 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1591 { 1592 Mat_MPIDense *a; 1593 PetscScalar *vals,*svals; 1594 MPI_Comm comm; 1595 MPI_Status status; 1596 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1597 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1598 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1599 PetscInt i,nz,j,rstart,rend; 1600 int fd; 1601 PetscErrorCode ierr; 1602 1603 PetscFunctionBegin; 1604 /* force binary viewer to load .info file if it has not yet done so */ 1605 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1606 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1607 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1608 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1609 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1610 if (!rank) { 1611 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1612 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1613 } 1614 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1615 M = header[1]; N = header[2]; nz = header[3]; 1616 1617 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1618 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1619 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1620 1621 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1622 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1623 1624 /* 1625 Handle case where matrix is stored on disk as a dense matrix 1626 */ 1627 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1628 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1629 PetscFunctionReturn(0); 1630 } 1631 1632 /* determine ownership of all rows */ 1633 if (newmat->rmap->n < 0) { 1634 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1635 } else { 1636 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1637 } 1638 if (newmat->cmap->n < 0) { 1639 n = PETSC_DECIDE; 1640 } else { 1641 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1642 } 1643 1644 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1645 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1646 rowners[0] = 0; 1647 for (i=2; i<=size; i++) { 1648 rowners[i] += rowners[i-1]; 1649 } 1650 rstart = rowners[rank]; 1651 rend = rowners[rank+1]; 1652 1653 /* distribute row lengths to all processors */ 1654 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1655 if (!rank) { 1656 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1657 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1658 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1659 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1660 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1661 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1662 } else { 1663 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1664 } 1665 1666 if (!rank) { 1667 /* calculate the number of nonzeros on each processor */ 1668 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1669 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1670 for (i=0; i<size; i++) { 1671 for (j=rowners[i]; j< rowners[i+1]; j++) { 1672 procsnz[i] += rowlengths[j]; 1673 } 1674 } 1675 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1676 1677 /* determine max buffer needed and allocate it */ 1678 maxnz = 0; 1679 for (i=0; i<size; i++) { 1680 maxnz = PetscMax(maxnz,procsnz[i]); 1681 } 1682 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1683 1684 /* read in my part of the matrix column indices */ 1685 nz = procsnz[0]; 1686 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1687 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1688 1689 /* read in every one elses and ship off */ 1690 for (i=1; i<size; i++) { 1691 nz = procsnz[i]; 1692 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1693 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1694 } 1695 ierr = PetscFree(cols);CHKERRQ(ierr); 1696 } else { 1697 /* determine buffer space needed for message */ 1698 nz = 0; 1699 for (i=0; i<m; i++) { 1700 nz += ourlens[i]; 1701 } 1702 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1703 1704 /* receive message of column indices*/ 1705 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1706 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1707 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1708 } 1709 1710 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1711 a = (Mat_MPIDense*)newmat->data; 1712 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1713 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1714 } 1715 1716 if (!rank) { 1717 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1718 1719 /* read in my part of the matrix numerical values */ 1720 nz = procsnz[0]; 1721 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1722 1723 /* insert into matrix */ 1724 jj = rstart; 1725 smycols = mycols; 1726 svals = vals; 1727 for (i=0; i<m; i++) { 1728 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1729 smycols += ourlens[i]; 1730 svals += ourlens[i]; 1731 jj++; 1732 } 1733 1734 /* read in other processors and ship out */ 1735 for (i=1; i<size; i++) { 1736 nz = procsnz[i]; 1737 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1738 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1739 } 1740 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1741 } else { 1742 /* receive numeric values */ 1743 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1744 1745 /* receive message of values*/ 1746 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1747 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1748 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1749 1750 /* insert into matrix */ 1751 jj = rstart; 1752 smycols = mycols; 1753 svals = vals; 1754 for (i=0; i<m; i++) { 1755 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1756 smycols += ourlens[i]; 1757 svals += ourlens[i]; 1758 jj++; 1759 } 1760 } 1761 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1762 ierr = PetscFree(vals);CHKERRQ(ierr); 1763 ierr = PetscFree(mycols);CHKERRQ(ierr); 1764 ierr = PetscFree(rowners);CHKERRQ(ierr); 1765 1766 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1767 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "MatEqual_MPIDense" 1773 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1774 { 1775 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1776 Mat a,b; 1777 PetscBool flg; 1778 PetscErrorCode ierr; 1779 1780 PetscFunctionBegin; 1781 a = matA->A; 1782 b = matB->A; 1783 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1784 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense" 1790 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1791 { 1792 PetscErrorCode ierr; 1793 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1794 Mat_TransMatMultDense *atb = a->atbdense; 1795 1796 PetscFunctionBegin; 1797 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1798 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1799 ierr = PetscFree(atb);CHKERRQ(ierr); 1800 PetscFunctionReturn(0); 1801 } 1802 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense" 1805 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1806 { 1807 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1808 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1809 Mat_TransMatMultDense *atb = c->atbdense; 1810 PetscErrorCode ierr; 1811 MPI_Comm comm; 1812 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1813 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1814 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1815 PetscScalar _DOne=1.0,_DZero=0.0; 1816 PetscBLASInt am,an,bn,aN; 1817 const PetscInt *ranges; 1818 1819 PetscFunctionBegin; 1820 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1821 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1823 1824 /* compute atbarray = aseq^T * bseq */ 1825 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1826 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1827 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1828 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1829 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1830 1831 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1832 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1833 1834 /* arrange atbarray into sendbuf */ 1835 k = 0; 1836 for (proc=0; proc<size; proc++) { 1837 for (j=0; j<cN; j++) { 1838 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1839 } 1840 } 1841 /* sum all atbarray to local values of C */ 1842 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1843 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1844 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense" 1850 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1851 { 1852 PetscErrorCode ierr; 1853 Mat Cdense; 1854 MPI_Comm comm; 1855 PetscMPIInt size; 1856 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1857 Mat_MPIDense *c; 1858 Mat_TransMatMultDense *atb; 1859 1860 PetscFunctionBegin; 1861 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1862 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1863 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1864 } 1865 1866 /* create matrix product Cdense */ 1867 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1868 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1869 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1870 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1871 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1873 *C = Cdense; 1874 1875 /* create data structure for reuse Cdense */ 1876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1877 ierr = PetscNew(&atb);CHKERRQ(ierr); 1878 cM = Cdense->rmap->N; 1879 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1880 1881 c = (Mat_MPIDense*)Cdense->data; 1882 c->atbdense = atb; 1883 atb->destroy = Cdense->ops->destroy; 1884 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense" 1890 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1891 { 1892 PetscErrorCode ierr; 1893 1894 PetscFunctionBegin; 1895 if (scall == MAT_INITIAL_MATRIX) { 1896 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1897 } 1898 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense" 1904 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1905 { 1906 PetscErrorCode ierr; 1907 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1908 Mat_MatMultDense *ab = a->abdense; 1909 1910 PetscFunctionBegin; 1911 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1912 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1913 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1914 1915 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1916 ierr = PetscFree(ab);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #if defined(PETSC_HAVE_ELEMENTAL) 1921 #undef __FUNCT__ 1922 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1923 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1924 { 1925 PetscErrorCode ierr; 1926 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1927 Mat_MatMultDense *ab=c->abdense; 1928 1929 PetscFunctionBegin; 1930 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1931 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1932 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1933 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1939 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1940 { 1941 PetscErrorCode ierr; 1942 Mat Ae,Be,Ce; 1943 Mat_MPIDense *c; 1944 Mat_MatMultDense *ab; 1945 1946 PetscFunctionBegin; 1947 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1948 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1949 } 1950 1951 /* convert A and B to Elemental matrices Ae and Be */ 1952 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1953 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1954 1955 /* Ce = Ae*Be */ 1956 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1957 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1958 1959 /* convert Ce to C */ 1960 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1961 1962 /* create data structure for reuse Cdense */ 1963 ierr = PetscNew(&ab);CHKERRQ(ierr); 1964 c = (Mat_MPIDense*)(*C)->data; 1965 c->abdense = ab; 1966 1967 ab->Ae = Ae; 1968 ab->Be = Be; 1969 ab->Ce = Ce; 1970 ab->destroy = (*C)->ops->destroy; 1971 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 1972 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1973 PetscFunctionReturn(0); 1974 } 1975 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1978 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1979 { 1980 PetscErrorCode ierr; 1981 1982 PetscFunctionBegin; 1983 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 1984 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1985 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1986 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1987 } else { 1988 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1989 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1990 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1991 } 1992 PetscFunctionReturn(0); 1993 } 1994 #endif 1995