1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 188 { 189 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 190 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 191 PetscErrorCode ierr; 192 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 193 const PetscInt *irow,*icol; 194 PetscScalar *av,*bv,*v = lmat->v; 195 Mat newmat; 196 IS iscol_local; 197 198 PetscFunctionBegin; 199 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 200 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 201 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 202 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 203 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 204 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 205 206 /* No parallel redistribution currently supported! Should really check each index set 207 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 208 original matrix! */ 209 210 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 211 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 212 213 /* Check submatrix call */ 214 if (scall == MAT_REUSE_MATRIX) { 215 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 216 /* Really need to test rows and column sizes! */ 217 newmat = *B; 218 } else { 219 /* Create and fill new matrix */ 220 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 221 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 222 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 223 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 224 } 225 226 /* Now extract the data pointers and do the copy, column at a time */ 227 newmatd = (Mat_MPIDense*)newmat->data; 228 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 229 230 for (i=0; i<Ncols; i++) { 231 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 232 for (j=0; j<nrows; j++) { 233 *bv++ = av[irow[j] - rstart]; 234 } 235 } 236 237 /* Assemble the matrices so that the correct flags are set */ 238 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 241 /* Free work space */ 242 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 243 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 244 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 245 *B = newmat; 246 PetscFunctionReturn(0); 247 } 248 249 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 260 { 261 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 262 PetscErrorCode ierr; 263 264 PetscFunctionBegin; 265 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 272 MPI_Comm comm; 273 PetscErrorCode ierr; 274 PetscInt nstash,reallocs; 275 InsertMode addv; 276 277 PetscFunctionBegin; 278 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 279 /* make sure all processors are either in INSERTMODE or ADDMODE */ 280 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 281 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 282 mat->insertmode = addv; /* in case this processor had no cache */ 283 284 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 285 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 286 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 291 { 292 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 293 PetscErrorCode ierr; 294 PetscInt i,*row,*col,flg,j,rstart,ncols; 295 PetscMPIInt n; 296 PetscScalar *val; 297 298 PetscFunctionBegin; 299 /* wait on receives */ 300 while (1) { 301 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 302 if (!flg) break; 303 304 for (i=0; i<n;) { 305 /* Now identify the consecutive vals belonging to the same row */ 306 for (j=i,rstart=row[j]; j<n; j++) { 307 if (row[j] != rstart) break; 308 } 309 if (j < n) ncols = j-i; 310 else ncols = n-i; 311 /* Now assemble all these values with a single function call */ 312 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 313 i = j; 314 } 315 } 316 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 320 321 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 322 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 328 { 329 PetscErrorCode ierr; 330 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 331 332 PetscFunctionBegin; 333 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 /* the code does not do the diagonal entries correctly unless the 338 matrix is square and the column and row owerships are identical. 339 This is a BUG. The only way to fix it seems to be to access 340 mdn->A and mdn->B directly and not through the MatZeroRows() 341 routine. 342 */ 343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 344 { 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 PetscErrorCode ierr; 347 PetscInt i,*owners = A->rmap->range; 348 PetscInt *sizes,j,idx,nsends; 349 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 350 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 351 PetscInt *lens,*lrows,*values; 352 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 353 MPI_Comm comm; 354 MPI_Request *send_waits,*recv_waits; 355 MPI_Status recv_status,*send_status; 356 PetscBool found; 357 const PetscScalar *xx; 358 PetscScalar *bb; 359 360 PetscFunctionBegin; 361 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 362 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 363 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 364 /* first count number of contributors to each processor */ 365 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 366 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 367 for (i=0; i<N; i++) { 368 idx = rows[i]; 369 found = PETSC_FALSE; 370 for (j=0; j<size; j++) { 371 if (idx >= owners[j] && idx < owners[j+1]) { 372 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 373 } 374 } 375 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 376 } 377 nsends = 0; 378 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 379 380 /* inform other processors of number of messages and max length*/ 381 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 382 383 /* post receives: */ 384 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 385 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 386 for (i=0; i<nrecvs; i++) { 387 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 388 } 389 390 /* do sends: 391 1) starts[i] gives the starting index in svalues for stuff going to 392 the ith processor 393 */ 394 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 395 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 396 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 397 398 starts[0] = 0; 399 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 400 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 401 402 starts[0] = 0; 403 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 404 count = 0; 405 for (i=0; i<size; i++) { 406 if (sizes[2*i+1]) { 407 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 408 } 409 } 410 ierr = PetscFree(starts);CHKERRQ(ierr); 411 412 base = owners[rank]; 413 414 /* wait on receives */ 415 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 416 count = nrecvs; 417 slen = 0; 418 while (count) { 419 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 420 /* unpack receives into our local space */ 421 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 422 423 source[imdex] = recv_status.MPI_SOURCE; 424 lens[imdex] = n; 425 slen += n; 426 count--; 427 } 428 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 429 430 /* move the data into the send scatter */ 431 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 432 count = 0; 433 for (i=0; i<nrecvs; i++) { 434 values = rvalues + i*nmax; 435 for (j=0; j<lens[i]; j++) { 436 lrows[count++] = values[j] - base; 437 } 438 } 439 ierr = PetscFree(rvalues);CHKERRQ(ierr); 440 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 441 ierr = PetscFree(owner);CHKERRQ(ierr); 442 ierr = PetscFree(sizes);CHKERRQ(ierr); 443 444 /* fix right hand side if needed */ 445 if (x && b) { 446 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 447 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 448 for (i=0; i<slen; i++) { 449 bb[lrows[i]] = diag*xx[lrows[i]]; 450 } 451 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 452 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 453 } 454 455 /* actually zap the local rows */ 456 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 457 if (diag != 0.0) { 458 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 459 PetscInt m = ll->lda, i; 460 461 for (i=0; i<slen; i++) { 462 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 463 } 464 } 465 ierr = PetscFree(lrows);CHKERRQ(ierr); 466 467 /* wait on sends */ 468 if (nsends) { 469 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 470 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 471 ierr = PetscFree(send_status);CHKERRQ(ierr); 472 } 473 ierr = PetscFree(send_waits);CHKERRQ(ierr); 474 ierr = PetscFree(svalues);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 479 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 480 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 481 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 482 483 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 484 { 485 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 496 { 497 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 498 PetscErrorCode ierr; 499 500 PetscFunctionBegin; 501 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 502 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 503 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 508 { 509 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 510 PetscErrorCode ierr; 511 PetscScalar zero = 0.0; 512 513 PetscFunctionBegin; 514 ierr = VecSet(yy,zero);CHKERRQ(ierr); 515 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 516 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 522 { 523 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 524 PetscErrorCode ierr; 525 526 PetscFunctionBegin; 527 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 528 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 529 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 530 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 535 { 536 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 537 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 538 PetscErrorCode ierr; 539 PetscInt len,i,n,m = A->rmap->n,radd; 540 PetscScalar *x,zero = 0.0; 541 542 PetscFunctionBegin; 543 ierr = VecSet(v,zero);CHKERRQ(ierr); 544 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 545 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 546 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 547 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 548 radd = A->rmap->rstart*m; 549 for (i=0; i<len; i++) { 550 x[i] = aloc->v[radd + i*m + i]; 551 } 552 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 553 PetscFunctionReturn(0); 554 } 555 556 PetscErrorCode MatDestroy_MPIDense(Mat mat) 557 { 558 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 559 PetscErrorCode ierr; 560 561 PetscFunctionBegin; 562 #if defined(PETSC_USE_LOG) 563 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 564 #endif 565 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 566 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 567 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 568 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 569 570 ierr = PetscFree(mat->data);CHKERRQ(ierr); 571 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 572 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 579 #if defined(PETSC_HAVE_ELEMENTAL) 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 581 #endif 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 595 { 596 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 597 PetscErrorCode ierr; 598 PetscViewerFormat format; 599 int fd; 600 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 601 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 602 PetscScalar *work,*v,*vv; 603 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 604 605 PetscFunctionBegin; 606 if (mdn->size == 1) { 607 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 608 } else { 609 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 610 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 611 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 612 613 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 614 if (format == PETSC_VIEWER_NATIVE) { 615 616 if (!rank) { 617 /* store the matrix as a dense matrix */ 618 header[0] = MAT_FILE_CLASSID; 619 header[1] = mat->rmap->N; 620 header[2] = N; 621 header[3] = MATRIX_BINARY_FORMAT_DENSE; 622 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 623 624 /* get largest work array needed for transposing array */ 625 mmax = mat->rmap->n; 626 for (i=1; i<size; i++) { 627 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 628 } 629 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 630 631 /* write out local array, by rows */ 632 m = mat->rmap->n; 633 v = a->v; 634 for (j=0; j<N; j++) { 635 for (i=0; i<m; i++) { 636 work[j + i*N] = *v++; 637 } 638 } 639 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 640 /* get largest work array to receive messages from other processes, excludes process zero */ 641 mmax = 0; 642 for (i=1; i<size; i++) { 643 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 644 } 645 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 646 for (k = 1; k < size; k++) { 647 v = vv; 648 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 649 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 650 651 for (j = 0; j < N; j++) { 652 for (i = 0; i < m; i++) { 653 work[j + i*N] = *v++; 654 } 655 } 656 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 657 } 658 ierr = PetscFree(work);CHKERRQ(ierr); 659 ierr = PetscFree(vv);CHKERRQ(ierr); 660 } else { 661 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 673 PetscErrorCode ierr; 674 PetscMPIInt rank = mdn->rank; 675 PetscViewerType vtype; 676 PetscBool iascii,isdraw; 677 PetscViewer sviewer; 678 PetscViewerFormat format; 679 680 PetscFunctionBegin; 681 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 682 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 683 if (iascii) { 684 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 685 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 689 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 691 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 693 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } else if (format == PETSC_VIEWER_ASCII_INFO) { 696 PetscFunctionReturn(0); 697 } 698 } else if (isdraw) { 699 PetscDraw draw; 700 PetscBool isnull; 701 702 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 703 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 704 if (isnull) PetscFunctionReturn(0); 705 } 706 707 { 708 /* assemble the entire matrix onto first processor. */ 709 Mat A; 710 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 711 PetscInt *cols; 712 PetscScalar *vals; 713 714 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 715 if (!rank) { 716 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 717 } else { 718 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 719 } 720 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 721 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 722 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 723 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i=0; i<m; i++) { 732 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 733 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 734 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 735 row++; 736 } 737 738 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 740 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 741 if (!rank) { 742 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 743 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 744 } 745 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 746 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 747 ierr = MatDestroy(&A);CHKERRQ(ierr); 748 } 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 753 { 754 PetscErrorCode ierr; 755 PetscBool iascii,isbinary,isdraw,issocket; 756 757 PetscFunctionBegin; 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 759 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 760 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 761 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 762 763 if (iascii || issocket || isdraw) { 764 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 765 } else if (isbinary) { 766 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 767 } 768 PetscFunctionReturn(0); 769 } 770 771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 772 { 773 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 774 Mat mdn = mat->A; 775 PetscErrorCode ierr; 776 PetscReal isend[5],irecv[5]; 777 778 PetscFunctionBegin; 779 info->block_size = 1.0; 780 781 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 782 783 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 784 isend[3] = info->memory; isend[4] = info->mallocs; 785 if (flag == MAT_LOCAL) { 786 info->nz_used = isend[0]; 787 info->nz_allocated = isend[1]; 788 info->nz_unneeded = isend[2]; 789 info->memory = isend[3]; 790 info->mallocs = isend[4]; 791 } else if (flag == MAT_GLOBAL_MAX) { 792 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 793 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } else if (flag == MAT_GLOBAL_SUM) { 800 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 801 802 info->nz_used = irecv[0]; 803 info->nz_allocated = irecv[1]; 804 info->nz_unneeded = irecv[2]; 805 info->memory = irecv[3]; 806 info->mallocs = irecv[4]; 807 } 808 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 809 info->fill_ratio_needed = 0; 810 info->factor_mallocs = 0; 811 PetscFunctionReturn(0); 812 } 813 814 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 815 { 816 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 817 PetscErrorCode ierr; 818 819 PetscFunctionBegin; 820 switch (op) { 821 case MAT_NEW_NONZERO_LOCATIONS: 822 case MAT_NEW_NONZERO_LOCATION_ERR: 823 case MAT_NEW_NONZERO_ALLOCATION_ERR: 824 MatCheckPreallocated(A,1); 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_ROW_ORIENTED: 828 MatCheckPreallocated(A,1); 829 a->roworiented = flg; 830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 831 break; 832 case MAT_NEW_DIAGONALS: 833 case MAT_KEEP_NONZERO_PATTERN: 834 case MAT_USE_HASH_TABLE: 835 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 836 break; 837 case MAT_IGNORE_OFF_PROC_ENTRIES: 838 a->donotstash = flg; 839 break; 840 case MAT_SYMMETRIC: 841 case MAT_STRUCTURALLY_SYMMETRIC: 842 case MAT_HERMITIAN: 843 case MAT_SYMMETRY_ETERNAL: 844 case MAT_IGNORE_LOWER_TRIANGULAR: 845 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 846 break; 847 default: 848 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 849 } 850 PetscFunctionReturn(0); 851 } 852 853 854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 855 { 856 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 857 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 858 const PetscScalar *l,*r; 859 PetscScalar x,*v; 860 PetscErrorCode ierr; 861 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 862 863 PetscFunctionBegin; 864 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 865 if (ll) { 866 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 867 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 868 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 869 for (i=0; i<m; i++) { 870 x = l[i]; 871 v = mat->v + i; 872 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 873 } 874 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 875 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 876 } 877 if (rr) { 878 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 879 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 880 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 883 for (i=0; i<n; i++) { 884 x = r[i]; 885 v = mat->v + i*m; 886 for (j=0; j<m; j++) (*v++) *= x; 887 } 888 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 889 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 890 } 891 PetscFunctionReturn(0); 892 } 893 894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 895 { 896 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 897 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 898 PetscErrorCode ierr; 899 PetscInt i,j; 900 PetscReal sum = 0.0; 901 PetscScalar *v = mat->v; 902 903 PetscFunctionBegin; 904 if (mdn->size == 1) { 905 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 906 } else { 907 if (type == NORM_FROBENIUS) { 908 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 909 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 910 } 911 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 912 *nrm = PetscSqrtReal(*nrm); 913 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 914 } else if (type == NORM_1) { 915 PetscReal *tmp,*tmp2; 916 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 917 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 918 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 919 *nrm = 0.0; 920 v = mat->v; 921 for (j=0; j<mdn->A->cmap->n; j++) { 922 for (i=0; i<mdn->A->rmap->n; i++) { 923 tmp[j] += PetscAbsScalar(*v); v++; 924 } 925 } 926 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 927 for (j=0; j<A->cmap->N; j++) { 928 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 929 } 930 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 931 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 932 } else if (type == NORM_INFINITY) { /* max row norm */ 933 PetscReal ntemp; 934 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 935 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 936 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 937 } 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 942 { 943 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 944 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 945 Mat B; 946 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 947 PetscErrorCode ierr; 948 PetscInt j,i; 949 PetscScalar *v; 950 951 PetscFunctionBegin; 952 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 953 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 954 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 955 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 956 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 957 } else { 958 B = *matout; 959 } 960 961 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 962 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 963 for (i=0; i<m; i++) rwork[i] = rstart + i; 964 for (j=0; j<n; j++) { 965 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 966 v += m; 967 } 968 ierr = PetscFree(rwork);CHKERRQ(ierr); 969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 972 *matout = B; 973 } else { 974 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 980 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 981 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 982 983 PetscErrorCode MatSetUp_MPIDense(Mat A) 984 { 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 993 { 994 PetscErrorCode ierr; 995 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 996 997 PetscFunctionBegin; 998 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 999 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1004 { 1005 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 PetscErrorCode MatRealPart_MPIDense(Mat A) 1014 { 1015 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1016 PetscErrorCode ierr; 1017 1018 PetscFunctionBegin; 1019 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1024 { 1025 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1026 PetscErrorCode ierr; 1027 1028 PetscFunctionBegin; 1029 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1034 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1035 { 1036 PetscErrorCode ierr; 1037 PetscInt i,n; 1038 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1039 PetscReal *work; 1040 1041 PetscFunctionBegin; 1042 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1043 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1044 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1045 if (type == NORM_2) { 1046 for (i=0; i<n; i++) work[i] *= work[i]; 1047 } 1048 if (type == NORM_INFINITY) { 1049 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1050 } else { 1051 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1052 } 1053 ierr = PetscFree(work);CHKERRQ(ierr); 1054 if (type == NORM_2) { 1055 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1056 } 1057 PetscFunctionReturn(0); 1058 } 1059 1060 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1061 { 1062 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1063 PetscErrorCode ierr; 1064 PetscScalar *a; 1065 PetscInt m,n,i; 1066 1067 PetscFunctionBegin; 1068 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1069 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1070 for (i=0; i<m*n; i++) { 1071 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1072 } 1073 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1078 1079 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1080 { 1081 PetscFunctionBegin; 1082 *missing = PETSC_FALSE; 1083 PetscFunctionReturn(0); 1084 } 1085 1086 /* -------------------------------------------------------------------*/ 1087 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1088 MatGetRow_MPIDense, 1089 MatRestoreRow_MPIDense, 1090 MatMult_MPIDense, 1091 /* 4*/ MatMultAdd_MPIDense, 1092 MatMultTranspose_MPIDense, 1093 MatMultTransposeAdd_MPIDense, 1094 0, 1095 0, 1096 0, 1097 /* 10*/ 0, 1098 0, 1099 0, 1100 0, 1101 MatTranspose_MPIDense, 1102 /* 15*/ MatGetInfo_MPIDense, 1103 MatEqual_MPIDense, 1104 MatGetDiagonal_MPIDense, 1105 MatDiagonalScale_MPIDense, 1106 MatNorm_MPIDense, 1107 /* 20*/ MatAssemblyBegin_MPIDense, 1108 MatAssemblyEnd_MPIDense, 1109 MatSetOption_MPIDense, 1110 MatZeroEntries_MPIDense, 1111 /* 24*/ MatZeroRows_MPIDense, 1112 0, 1113 0, 1114 0, 1115 0, 1116 /* 29*/ MatSetUp_MPIDense, 1117 0, 1118 0, 1119 MatGetDiagonalBlock_MPIDense, 1120 0, 1121 /* 34*/ MatDuplicate_MPIDense, 1122 0, 1123 0, 1124 0, 1125 0, 1126 /* 39*/ MatAXPY_MPIDense, 1127 MatCreateSubMatrices_MPIDense, 1128 0, 1129 MatGetValues_MPIDense, 1130 0, 1131 /* 44*/ 0, 1132 MatScale_MPIDense, 1133 MatShift_Basic, 1134 0, 1135 0, 1136 /* 49*/ MatSetRandom_MPIDense, 1137 0, 1138 0, 1139 0, 1140 0, 1141 /* 54*/ 0, 1142 0, 1143 0, 1144 0, 1145 0, 1146 /* 59*/ MatCreateSubMatrix_MPIDense, 1147 MatDestroy_MPIDense, 1148 MatView_MPIDense, 1149 0, 1150 0, 1151 /* 64*/ 0, 1152 0, 1153 0, 1154 0, 1155 0, 1156 /* 69*/ 0, 1157 0, 1158 0, 1159 0, 1160 0, 1161 /* 74*/ 0, 1162 0, 1163 0, 1164 0, 1165 0, 1166 /* 79*/ 0, 1167 0, 1168 0, 1169 0, 1170 /* 83*/ MatLoad_MPIDense, 1171 0, 1172 0, 1173 0, 1174 0, 1175 0, 1176 #if defined(PETSC_HAVE_ELEMENTAL) 1177 /* 89*/ MatMatMult_MPIDense_MPIDense, 1178 MatMatMultSymbolic_MPIDense_MPIDense, 1179 #else 1180 /* 89*/ 0, 1181 0, 1182 #endif 1183 MatMatMultNumeric_MPIDense, 1184 0, 1185 0, 1186 /* 94*/ 0, 1187 0, 1188 0, 1189 0, 1190 0, 1191 /* 99*/ 0, 1192 0, 1193 0, 1194 MatConjugate_MPIDense, 1195 0, 1196 /*104*/ 0, 1197 MatRealPart_MPIDense, 1198 MatImaginaryPart_MPIDense, 1199 0, 1200 0, 1201 /*109*/ 0, 1202 0, 1203 0, 1204 0, 1205 MatMissingDiagonal_MPIDense, 1206 /*114*/ 0, 1207 0, 1208 0, 1209 0, 1210 0, 1211 /*119*/ 0, 1212 0, 1213 0, 1214 0, 1215 0, 1216 /*124*/ 0, 1217 MatGetColumnNorms_MPIDense, 1218 0, 1219 0, 1220 0, 1221 /*129*/ 0, 1222 MatTransposeMatMult_MPIDense_MPIDense, 1223 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1224 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1225 0, 1226 /*134*/ 0, 1227 0, 1228 0, 1229 0, 1230 0, 1231 /*139*/ 0, 1232 0, 1233 0, 1234 0, 1235 0, 1236 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1237 }; 1238 1239 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1240 { 1241 Mat_MPIDense *a; 1242 PetscErrorCode ierr; 1243 1244 PetscFunctionBegin; 1245 mat->preallocated = PETSC_TRUE; 1246 /* Note: For now, when data is specified above, this assumes the user correctly 1247 allocates the local dense storage space. We should add error checking. */ 1248 1249 a = (Mat_MPIDense*)mat->data; 1250 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1251 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1252 a->nvec = mat->cmap->n; 1253 1254 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1255 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1256 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1257 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1258 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #if defined(PETSC_HAVE_ELEMENTAL) 1263 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1264 { 1265 Mat mat_elemental; 1266 PetscErrorCode ierr; 1267 PetscScalar *v; 1268 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1269 1270 PetscFunctionBegin; 1271 if (reuse == MAT_REUSE_MATRIX) { 1272 mat_elemental = *newmat; 1273 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1274 } else { 1275 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1276 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1277 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1278 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1279 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1280 } 1281 1282 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1283 for (i=0; i<N; i++) cols[i] = i; 1284 for (i=0; i<m; i++) rows[i] = rstart + i; 1285 1286 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1287 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1288 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1289 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1290 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1291 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1292 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1293 1294 if (reuse == MAT_INPLACE_MATRIX) { 1295 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1296 } else { 1297 *newmat = mat_elemental; 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 #endif 1302 1303 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1304 { 1305 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1306 PetscErrorCode ierr; 1307 1308 PetscFunctionBegin; 1309 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1314 { 1315 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1316 PetscErrorCode ierr; 1317 1318 PetscFunctionBegin; 1319 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1324 { 1325 PetscErrorCode ierr; 1326 Mat_MPIDense *mat; 1327 PetscInt m,nloc,N; 1328 1329 PetscFunctionBegin; 1330 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1331 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1332 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1333 PetscInt sum; 1334 1335 if (n == PETSC_DECIDE) { 1336 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1337 } 1338 /* Check sum(n) = N */ 1339 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1340 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1341 1342 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1343 } 1344 1345 /* numeric phase */ 1346 mat = (Mat_MPIDense*)(*outmat)->data; 1347 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1348 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1349 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } 1352 1353 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1354 { 1355 Mat_MPIDense *a; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1360 mat->data = (void*)a; 1361 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1362 1363 mat->insertmode = NOT_SET_VALUES; 1364 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1365 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1366 1367 /* build cache for off array entries formed */ 1368 a->donotstash = PETSC_FALSE; 1369 1370 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1371 1372 /* stuff used for matrix vector multiply */ 1373 a->lvec = 0; 1374 a->Mvctx = 0; 1375 a->roworiented = PETSC_TRUE; 1376 1377 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1378 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1379 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1380 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1381 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1382 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1383 #if defined(PETSC_HAVE_ELEMENTAL) 1384 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1385 #endif 1386 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1387 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1388 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1389 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1390 1391 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1392 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1393 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1394 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1395 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1396 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 /*MC 1401 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1402 1403 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1404 and MATMPIDENSE otherwise. 1405 1406 Options Database Keys: 1407 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1408 1409 Level: beginner 1410 1411 1412 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1413 M*/ 1414 1415 /*@C 1416 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1417 1418 Not collective 1419 1420 Input Parameters: 1421 . B - the matrix 1422 - data - optional location of matrix data. Set data=NULL for PETSc 1423 to control all matrix memory allocation. 1424 1425 Notes: 1426 The dense format is fully compatible with standard Fortran 77 1427 storage by columns. 1428 1429 The data input variable is intended primarily for Fortran programmers 1430 who wish to allocate their own matrix memory space. Most users should 1431 set data=NULL. 1432 1433 Level: intermediate 1434 1435 .keywords: matrix,dense, parallel 1436 1437 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1438 @*/ 1439 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1440 { 1441 PetscErrorCode ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 /*@ 1449 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1450 array provided by the user. This is useful to avoid copying an array 1451 into a matrix 1452 1453 Not Collective 1454 1455 Input Parameters: 1456 + mat - the matrix 1457 - array - the array in column major order 1458 1459 Notes: 1460 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1461 freed when the matrix is destroyed. 1462 1463 Level: developer 1464 1465 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1466 1467 @*/ 1468 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1469 { 1470 PetscErrorCode ierr; 1471 PetscFunctionBegin; 1472 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1473 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 /*@ 1478 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1479 1480 Not Collective 1481 1482 Input Parameters: 1483 . mat - the matrix 1484 1485 Notes: 1486 You can only call this after a call to MatDensePlaceArray() 1487 1488 Level: developer 1489 1490 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1491 1492 @*/ 1493 PetscErrorCode MatDenseResetArray(Mat mat) 1494 { 1495 PetscErrorCode ierr; 1496 PetscFunctionBegin; 1497 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1498 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 /*@C 1503 MatCreateDense - Creates a parallel matrix in dense format. 1504 1505 Collective on MPI_Comm 1506 1507 Input Parameters: 1508 + comm - MPI communicator 1509 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1510 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1511 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1512 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1513 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1514 to control all matrix memory allocation. 1515 1516 Output Parameter: 1517 . A - the matrix 1518 1519 Notes: 1520 The dense format is fully compatible with standard Fortran 77 1521 storage by columns. 1522 1523 The data input variable is intended primarily for Fortran programmers 1524 who wish to allocate their own matrix memory space. Most users should 1525 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1526 1527 The user MUST specify either the local or global matrix dimensions 1528 (possibly both). 1529 1530 Level: intermediate 1531 1532 .keywords: matrix,dense, parallel 1533 1534 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1535 @*/ 1536 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1537 { 1538 PetscErrorCode ierr; 1539 PetscMPIInt size; 1540 1541 PetscFunctionBegin; 1542 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1543 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1544 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1545 if (size > 1) { 1546 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1547 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1548 if (data) { /* user provided data array, so no need to assemble */ 1549 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1550 (*A)->assembled = PETSC_TRUE; 1551 } 1552 } else { 1553 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1554 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1560 { 1561 Mat mat; 1562 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 *newmat = 0; 1567 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1568 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1569 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1570 a = (Mat_MPIDense*)mat->data; 1571 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1572 1573 mat->factortype = A->factortype; 1574 mat->assembled = PETSC_TRUE; 1575 mat->preallocated = PETSC_TRUE; 1576 1577 a->size = oldmat->size; 1578 a->rank = oldmat->rank; 1579 mat->insertmode = NOT_SET_VALUES; 1580 a->nvec = oldmat->nvec; 1581 a->donotstash = oldmat->donotstash; 1582 1583 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1584 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1585 1586 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1587 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1588 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1589 1590 *newmat = mat; 1591 PetscFunctionReturn(0); 1592 } 1593 1594 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1595 { 1596 PetscErrorCode ierr; 1597 PetscMPIInt rank,size; 1598 const PetscInt *rowners; 1599 PetscInt i,m,n,nz,j,mMax; 1600 PetscScalar *array,*vals,*vals_ptr; 1601 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1602 1603 PetscFunctionBegin; 1604 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1605 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1606 1607 /* determine ownership of rows and columns */ 1608 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1609 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1610 1611 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1612 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1613 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1614 } 1615 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1616 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1617 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1618 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1619 if (!rank) { 1620 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1621 1622 /* read in my part of the matrix numerical values */ 1623 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1624 1625 /* insert into matrix-by row (this is why cannot directly read into array */ 1626 vals_ptr = vals; 1627 for (i=0; i<m; i++) { 1628 for (j=0; j<N; j++) { 1629 array[i + j*m] = *vals_ptr++; 1630 } 1631 } 1632 1633 /* read in other processors and ship out */ 1634 for (i=1; i<size; i++) { 1635 nz = (rowners[i+1] - rowners[i])*N; 1636 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1637 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1638 } 1639 } else { 1640 /* receive numeric values */ 1641 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1642 1643 /* receive message of values*/ 1644 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1645 1646 /* insert into matrix-by row (this is why cannot directly read into array */ 1647 vals_ptr = vals; 1648 for (i=0; i<m; i++) { 1649 for (j=0; j<N; j++) { 1650 array[i + j*m] = *vals_ptr++; 1651 } 1652 } 1653 } 1654 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1655 ierr = PetscFree(vals);CHKERRQ(ierr); 1656 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1657 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1658 PetscFunctionReturn(0); 1659 } 1660 1661 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1662 { 1663 Mat_MPIDense *a; 1664 PetscScalar *vals,*svals; 1665 MPI_Comm comm; 1666 MPI_Status status; 1667 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1668 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1669 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1670 PetscInt i,nz,j,rstart,rend; 1671 int fd; 1672 PetscErrorCode ierr; 1673 1674 PetscFunctionBegin; 1675 /* force binary viewer to load .info file if it has not yet done so */ 1676 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1677 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1678 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1679 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1680 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1681 if (!rank) { 1682 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1683 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1684 } 1685 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1686 M = header[1]; N = header[2]; nz = header[3]; 1687 1688 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1689 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1690 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1691 1692 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1693 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1694 1695 /* 1696 Handle case where matrix is stored on disk as a dense matrix 1697 */ 1698 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1699 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 /* determine ownership of all rows */ 1704 if (newmat->rmap->n < 0) { 1705 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1706 } else { 1707 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1708 } 1709 if (newmat->cmap->n < 0) { 1710 n = PETSC_DECIDE; 1711 } else { 1712 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1713 } 1714 1715 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1716 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1717 rowners[0] = 0; 1718 for (i=2; i<=size; i++) { 1719 rowners[i] += rowners[i-1]; 1720 } 1721 rstart = rowners[rank]; 1722 rend = rowners[rank+1]; 1723 1724 /* distribute row lengths to all processors */ 1725 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1726 if (!rank) { 1727 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1728 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1729 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1730 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1731 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1732 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1733 } else { 1734 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1735 } 1736 1737 if (!rank) { 1738 /* calculate the number of nonzeros on each processor */ 1739 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1740 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1741 for (i=0; i<size; i++) { 1742 for (j=rowners[i]; j< rowners[i+1]; j++) { 1743 procsnz[i] += rowlengths[j]; 1744 } 1745 } 1746 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1747 1748 /* determine max buffer needed and allocate it */ 1749 maxnz = 0; 1750 for (i=0; i<size; i++) { 1751 maxnz = PetscMax(maxnz,procsnz[i]); 1752 } 1753 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1754 1755 /* read in my part of the matrix column indices */ 1756 nz = procsnz[0]; 1757 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1758 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1759 1760 /* read in every one elses and ship off */ 1761 for (i=1; i<size; i++) { 1762 nz = procsnz[i]; 1763 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1764 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1765 } 1766 ierr = PetscFree(cols);CHKERRQ(ierr); 1767 } else { 1768 /* determine buffer space needed for message */ 1769 nz = 0; 1770 for (i=0; i<m; i++) { 1771 nz += ourlens[i]; 1772 } 1773 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1774 1775 /* receive message of column indices*/ 1776 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1777 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1778 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1779 } 1780 1781 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1782 a = (Mat_MPIDense*)newmat->data; 1783 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1784 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1785 } 1786 1787 if (!rank) { 1788 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1789 1790 /* read in my part of the matrix numerical values */ 1791 nz = procsnz[0]; 1792 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1793 1794 /* insert into matrix */ 1795 jj = rstart; 1796 smycols = mycols; 1797 svals = vals; 1798 for (i=0; i<m; i++) { 1799 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1800 smycols += ourlens[i]; 1801 svals += ourlens[i]; 1802 jj++; 1803 } 1804 1805 /* read in other processors and ship out */ 1806 for (i=1; i<size; i++) { 1807 nz = procsnz[i]; 1808 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1809 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1810 } 1811 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1812 } else { 1813 /* receive numeric values */ 1814 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1815 1816 /* receive message of values*/ 1817 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1818 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1819 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1820 1821 /* insert into matrix */ 1822 jj = rstart; 1823 smycols = mycols; 1824 svals = vals; 1825 for (i=0; i<m; i++) { 1826 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1827 smycols += ourlens[i]; 1828 svals += ourlens[i]; 1829 jj++; 1830 } 1831 } 1832 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1833 ierr = PetscFree(vals);CHKERRQ(ierr); 1834 ierr = PetscFree(mycols);CHKERRQ(ierr); 1835 ierr = PetscFree(rowners);CHKERRQ(ierr); 1836 1837 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1838 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1843 { 1844 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1845 Mat a,b; 1846 PetscBool flg; 1847 PetscErrorCode ierr; 1848 1849 PetscFunctionBegin; 1850 a = matA->A; 1851 b = matB->A; 1852 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1853 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1858 { 1859 PetscErrorCode ierr; 1860 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1861 Mat_TransMatMultDense *atb = a->atbdense; 1862 1863 PetscFunctionBegin; 1864 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1865 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1866 ierr = PetscFree(atb);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1871 { 1872 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1873 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1874 Mat_TransMatMultDense *atb = c->atbdense; 1875 PetscErrorCode ierr; 1876 MPI_Comm comm; 1877 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1878 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1879 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1880 PetscScalar _DOne=1.0,_DZero=0.0; 1881 PetscBLASInt am,an,bn,aN; 1882 const PetscInt *ranges; 1883 1884 PetscFunctionBegin; 1885 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1888 1889 /* compute atbarray = aseq^T * bseq */ 1890 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1891 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1892 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1894 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1895 1896 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1897 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1898 1899 /* arrange atbarray into sendbuf */ 1900 k = 0; 1901 for (proc=0; proc<size; proc++) { 1902 for (j=0; j<cN; j++) { 1903 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1904 } 1905 } 1906 /* sum all atbarray to local values of C */ 1907 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1908 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1909 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1910 PetscFunctionReturn(0); 1911 } 1912 1913 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1914 { 1915 PetscErrorCode ierr; 1916 Mat Cdense; 1917 MPI_Comm comm; 1918 PetscMPIInt size; 1919 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1920 Mat_MPIDense *c; 1921 Mat_TransMatMultDense *atb; 1922 1923 PetscFunctionBegin; 1924 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1925 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1926 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1927 } 1928 1929 /* create matrix product Cdense */ 1930 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1931 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1932 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1933 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1934 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1935 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1936 *C = Cdense; 1937 1938 /* create data structure for reuse Cdense */ 1939 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1940 ierr = PetscNew(&atb);CHKERRQ(ierr); 1941 cM = Cdense->rmap->N; 1942 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1943 1944 c = (Mat_MPIDense*)Cdense->data; 1945 c->atbdense = atb; 1946 atb->destroy = Cdense->ops->destroy; 1947 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1952 { 1953 PetscErrorCode ierr; 1954 1955 PetscFunctionBegin; 1956 if (scall == MAT_INITIAL_MATRIX) { 1957 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1958 } 1959 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1964 { 1965 PetscErrorCode ierr; 1966 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1967 Mat_MatMultDense *ab = a->abdense; 1968 1969 PetscFunctionBegin; 1970 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1971 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1972 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1973 1974 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1975 ierr = PetscFree(ab);CHKERRQ(ierr); 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #if defined(PETSC_HAVE_ELEMENTAL) 1980 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1981 { 1982 PetscErrorCode ierr; 1983 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1984 Mat_MatMultDense *ab=c->abdense; 1985 1986 PetscFunctionBegin; 1987 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1988 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1989 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1990 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1991 PetscFunctionReturn(0); 1992 } 1993 1994 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1995 { 1996 PetscErrorCode ierr; 1997 Mat Ae,Be,Ce; 1998 Mat_MPIDense *c; 1999 Mat_MatMultDense *ab; 2000 2001 PetscFunctionBegin; 2002 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2003 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2004 } 2005 2006 /* convert A and B to Elemental matrices Ae and Be */ 2007 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2008 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2009 2010 /* Ce = Ae*Be */ 2011 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2012 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2013 2014 /* convert Ce to C */ 2015 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2016 2017 /* create data structure for reuse Cdense */ 2018 ierr = PetscNew(&ab);CHKERRQ(ierr); 2019 c = (Mat_MPIDense*)(*C)->data; 2020 c->abdense = ab; 2021 2022 ab->Ae = Ae; 2023 ab->Be = Be; 2024 ab->Ce = Ce; 2025 ab->destroy = (*C)->ops->destroy; 2026 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2027 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2028 PetscFunctionReturn(0); 2029 } 2030 2031 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2032 { 2033 PetscErrorCode ierr; 2034 2035 PetscFunctionBegin; 2036 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2037 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2038 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2039 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2040 } else { 2041 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2042 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2043 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2044 } 2045 PetscFunctionReturn(0); 2046 } 2047 #endif 2048 2049