1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 #include <petscblaslapack.h> 10 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39 { 40 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41 PetscErrorCode ierr; 42 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43 44 PetscFunctionBegin; 45 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46 lrow = row - rstart; 47 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48 PetscFunctionReturn(0); 49 } 50 51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52 { 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 62 { 63 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 64 PetscErrorCode ierr; 65 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 66 PetscScalar *array; 67 MPI_Comm comm; 68 Mat B; 69 70 PetscFunctionBegin; 71 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 72 73 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 74 if (!B) { 75 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 76 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 77 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 78 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 79 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 80 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 81 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 82 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 84 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 85 *a = B; 86 ierr = MatDestroy(&B);CHKERRQ(ierr); 87 } else *a = B; 88 PetscFunctionReturn(0); 89 } 90 91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 92 { 93 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94 PetscErrorCode ierr; 95 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96 PetscBool roworiented = A->roworiented; 97 98 PetscFunctionBegin; 99 for (i=0; i<m; i++) { 100 if (idxm[i] < 0) continue; 101 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 102 if (idxm[i] >= rstart && idxm[i] < rend) { 103 row = idxm[i] - rstart; 104 if (roworiented) { 105 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 106 } else { 107 for (j=0; j<n; j++) { 108 if (idxn[j] < 0) continue; 109 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 110 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 111 } 112 } 113 } else if (!A->donotstash) { 114 mat->assembled = PETSC_FALSE; 115 if (roworiented) { 116 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117 } else { 118 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 119 } 120 } 121 } 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126 { 127 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128 PetscErrorCode ierr; 129 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141 } 142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 143 } 144 PetscFunctionReturn(0); 145 } 146 147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148 { 149 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150 PetscErrorCode ierr; 151 152 PetscFunctionBegin; 153 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 158 { 159 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 178 { 179 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 188 { 189 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 190 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 191 PetscErrorCode ierr; 192 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 193 const PetscInt *irow,*icol; 194 PetscScalar *av,*bv,*v = lmat->v; 195 Mat newmat; 196 IS iscol_local; 197 198 PetscFunctionBegin; 199 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 200 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 201 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 202 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 203 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 204 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 205 206 /* No parallel redistribution currently supported! Should really check each index set 207 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 208 original matrix! */ 209 210 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 211 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 212 213 /* Check submatrix call */ 214 if (scall == MAT_REUSE_MATRIX) { 215 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 216 /* Really need to test rows and column sizes! */ 217 newmat = *B; 218 } else { 219 /* Create and fill new matrix */ 220 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 221 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 222 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 223 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 224 } 225 226 /* Now extract the data pointers and do the copy, column at a time */ 227 newmatd = (Mat_MPIDense*)newmat->data; 228 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 229 230 for (i=0; i<Ncols; i++) { 231 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 232 for (j=0; j<nrows; j++) { 233 *bv++ = av[irow[j] - rstart]; 234 } 235 } 236 237 /* Assemble the matrices so that the correct flags are set */ 238 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 239 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 240 241 /* Free work space */ 242 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 243 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 244 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 245 *B = newmat; 246 PetscFunctionReturn(0); 247 } 248 249 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 256 PetscFunctionReturn(0); 257 } 258 259 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 260 { 261 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 262 PetscErrorCode ierr; 263 264 PetscFunctionBegin; 265 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 272 MPI_Comm comm; 273 PetscErrorCode ierr; 274 PetscInt nstash,reallocs; 275 InsertMode addv; 276 277 PetscFunctionBegin; 278 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 279 /* make sure all processors are either in INSERTMODE or ADDMODE */ 280 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 281 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 282 mat->insertmode = addv; /* in case this processor had no cache */ 283 284 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 285 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 286 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 291 { 292 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 293 PetscErrorCode ierr; 294 PetscInt i,*row,*col,flg,j,rstart,ncols; 295 PetscMPIInt n; 296 PetscScalar *val; 297 298 PetscFunctionBegin; 299 /* wait on receives */ 300 while (1) { 301 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 302 if (!flg) break; 303 304 for (i=0; i<n;) { 305 /* Now identify the consecutive vals belonging to the same row */ 306 for (j=i,rstart=row[j]; j<n; j++) { 307 if (row[j] != rstart) break; 308 } 309 if (j < n) ncols = j-i; 310 else ncols = n-i; 311 /* Now assemble all these values with a single function call */ 312 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 313 i = j; 314 } 315 } 316 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 320 321 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 322 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 328 { 329 PetscErrorCode ierr; 330 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 331 332 PetscFunctionBegin; 333 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 334 PetscFunctionReturn(0); 335 } 336 337 /* the code does not do the diagonal entries correctly unless the 338 matrix is square and the column and row owerships are identical. 339 This is a BUG. The only way to fix it seems to be to access 340 mdn->A and mdn->B directly and not through the MatZeroRows() 341 routine. 342 */ 343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 344 { 345 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 346 PetscErrorCode ierr; 347 PetscInt i,*owners = A->rmap->range; 348 PetscInt *sizes,j,idx,nsends; 349 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 350 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 351 PetscInt *lens,*lrows,*values; 352 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 353 MPI_Comm comm; 354 MPI_Request *send_waits,*recv_waits; 355 MPI_Status recv_status,*send_status; 356 PetscBool found; 357 const PetscScalar *xx; 358 PetscScalar *bb; 359 360 PetscFunctionBegin; 361 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 362 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 363 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 364 /* first count number of contributors to each processor */ 365 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 366 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 367 for (i=0; i<N; i++) { 368 idx = rows[i]; 369 found = PETSC_FALSE; 370 for (j=0; j<size; j++) { 371 if (idx >= owners[j] && idx < owners[j+1]) { 372 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 373 } 374 } 375 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 376 } 377 nsends = 0; 378 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 379 380 /* inform other processors of number of messages and max length*/ 381 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 382 383 /* post receives: */ 384 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 385 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 386 for (i=0; i<nrecvs; i++) { 387 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 388 } 389 390 /* do sends: 391 1) starts[i] gives the starting index in svalues for stuff going to 392 the ith processor 393 */ 394 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 395 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 396 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 397 398 starts[0] = 0; 399 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 400 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 401 402 starts[0] = 0; 403 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 404 count = 0; 405 for (i=0; i<size; i++) { 406 if (sizes[2*i+1]) { 407 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 408 } 409 } 410 ierr = PetscFree(starts);CHKERRQ(ierr); 411 412 base = owners[rank]; 413 414 /* wait on receives */ 415 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 416 count = nrecvs; 417 slen = 0; 418 while (count) { 419 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 420 /* unpack receives into our local space */ 421 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 422 423 source[imdex] = recv_status.MPI_SOURCE; 424 lens[imdex] = n; 425 slen += n; 426 count--; 427 } 428 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 429 430 /* move the data into the send scatter */ 431 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 432 count = 0; 433 for (i=0; i<nrecvs; i++) { 434 values = rvalues + i*nmax; 435 for (j=0; j<lens[i]; j++) { 436 lrows[count++] = values[j] - base; 437 } 438 } 439 ierr = PetscFree(rvalues);CHKERRQ(ierr); 440 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 441 ierr = PetscFree(owner);CHKERRQ(ierr); 442 ierr = PetscFree(sizes);CHKERRQ(ierr); 443 444 /* fix right hand side if needed */ 445 if (x && b) { 446 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 447 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 448 for (i=0; i<slen; i++) { 449 bb[lrows[i]] = diag*xx[lrows[i]]; 450 } 451 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 452 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 453 } 454 455 /* actually zap the local rows */ 456 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 457 if (diag != 0.0) { 458 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 459 PetscInt m = ll->lda, i; 460 461 for (i=0; i<slen; i++) { 462 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 463 } 464 } 465 ierr = PetscFree(lrows);CHKERRQ(ierr); 466 467 /* wait on sends */ 468 if (nsends) { 469 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 470 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 471 ierr = PetscFree(send_status);CHKERRQ(ierr); 472 } 473 ierr = PetscFree(send_waits);CHKERRQ(ierr); 474 ierr = PetscFree(svalues);CHKERRQ(ierr); 475 PetscFunctionReturn(0); 476 } 477 478 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 479 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 480 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 481 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 482 483 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 484 { 485 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 492 PetscFunctionReturn(0); 493 } 494 495 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 496 { 497 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 498 PetscErrorCode ierr; 499 500 PetscFunctionBegin; 501 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 502 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 503 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 508 { 509 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 510 PetscErrorCode ierr; 511 PetscScalar zero = 0.0; 512 513 PetscFunctionBegin; 514 ierr = VecSet(yy,zero);CHKERRQ(ierr); 515 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 516 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 522 { 523 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 524 PetscErrorCode ierr; 525 526 PetscFunctionBegin; 527 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 528 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 529 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 530 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 531 PetscFunctionReturn(0); 532 } 533 534 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 535 { 536 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 537 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 538 PetscErrorCode ierr; 539 PetscInt len,i,n,m = A->rmap->n,radd; 540 PetscScalar *x,zero = 0.0; 541 542 PetscFunctionBegin; 543 ierr = VecSet(v,zero);CHKERRQ(ierr); 544 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 545 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 546 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 547 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 548 radd = A->rmap->rstart*m; 549 for (i=0; i<len; i++) { 550 x[i] = aloc->v[radd + i*m + i]; 551 } 552 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 553 PetscFunctionReturn(0); 554 } 555 556 PetscErrorCode MatDestroy_MPIDense(Mat mat) 557 { 558 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 559 PetscErrorCode ierr; 560 561 PetscFunctionBegin; 562 #if defined(PETSC_USE_LOG) 563 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 564 #endif 565 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 566 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 567 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 568 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 569 570 ierr = PetscFree(mat->data);CHKERRQ(ierr); 571 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 572 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 579 #if defined(PETSC_HAVE_ELEMENTAL) 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 581 #endif 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 594 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 595 { 596 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 597 PetscErrorCode ierr; 598 PetscViewerFormat format; 599 int fd; 600 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 601 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 602 PetscScalar *work,*v,*vv; 603 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 604 605 PetscFunctionBegin; 606 if (mdn->size == 1) { 607 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 608 } else { 609 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 610 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 611 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 612 613 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 614 if (format == PETSC_VIEWER_NATIVE) { 615 616 if (!rank) { 617 /* store the matrix as a dense matrix */ 618 header[0] = MAT_FILE_CLASSID; 619 header[1] = mat->rmap->N; 620 header[2] = N; 621 header[3] = MATRIX_BINARY_FORMAT_DENSE; 622 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 623 624 /* get largest work array needed for transposing array */ 625 mmax = mat->rmap->n; 626 for (i=1; i<size; i++) { 627 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 628 } 629 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 630 631 /* write out local array, by rows */ 632 m = mat->rmap->n; 633 v = a->v; 634 for (j=0; j<N; j++) { 635 for (i=0; i<m; i++) { 636 work[j + i*N] = *v++; 637 } 638 } 639 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 640 /* get largest work array to receive messages from other processes, excludes process zero */ 641 mmax = 0; 642 for (i=1; i<size; i++) { 643 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 644 } 645 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 646 for (k = 1; k < size; k++) { 647 v = vv; 648 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 649 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 650 651 for (j = 0; j < N; j++) { 652 for (i = 0; i < m; i++) { 653 work[j + i*N] = *v++; 654 } 655 } 656 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 657 } 658 ierr = PetscFree(work);CHKERRQ(ierr); 659 ierr = PetscFree(vv);CHKERRQ(ierr); 660 } else { 661 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 669 #include <petscdraw.h> 670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 671 { 672 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 673 PetscErrorCode ierr; 674 PetscMPIInt rank = mdn->rank; 675 PetscViewerType vtype; 676 PetscBool iascii,isdraw; 677 PetscViewer sviewer; 678 PetscViewerFormat format; 679 680 PetscFunctionBegin; 681 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 682 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 683 if (iascii) { 684 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 685 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 686 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 687 MatInfo info; 688 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 689 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 691 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 693 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } else if (format == PETSC_VIEWER_ASCII_INFO) { 696 PetscFunctionReturn(0); 697 } 698 } else if (isdraw) { 699 PetscDraw draw; 700 PetscBool isnull; 701 702 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 703 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 704 if (isnull) PetscFunctionReturn(0); 705 } 706 707 { 708 /* assemble the entire matrix onto first processor. */ 709 Mat A; 710 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 711 PetscInt *cols; 712 PetscScalar *vals; 713 714 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 715 if (!rank) { 716 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 717 } else { 718 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 719 } 720 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 721 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 722 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 723 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 724 725 /* Copy the matrix ... This isn't the most efficient means, 726 but it's quick for now */ 727 A->insertmode = INSERT_VALUES; 728 729 row = mat->rmap->rstart; 730 m = mdn->A->rmap->n; 731 for (i=0; i<m; i++) { 732 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 733 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 734 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 735 row++; 736 } 737 738 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 740 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 741 if (!rank) { 742 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 743 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 744 } 745 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 746 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 747 ierr = MatDestroy(&A);CHKERRQ(ierr); 748 } 749 PetscFunctionReturn(0); 750 } 751 752 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 753 { 754 PetscErrorCode ierr; 755 PetscBool iascii,isbinary,isdraw,issocket; 756 757 PetscFunctionBegin; 758 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 759 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 760 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 761 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 762 763 if (iascii || issocket || isdraw) { 764 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 765 } else if (isbinary) { 766 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 767 } 768 PetscFunctionReturn(0); 769 } 770 771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 772 { 773 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 774 Mat mdn = mat->A; 775 PetscErrorCode ierr; 776 PetscReal isend[5],irecv[5]; 777 778 PetscFunctionBegin; 779 info->block_size = 1.0; 780 781 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 782 783 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 784 isend[3] = info->memory; isend[4] = info->mallocs; 785 if (flag == MAT_LOCAL) { 786 info->nz_used = isend[0]; 787 info->nz_allocated = isend[1]; 788 info->nz_unneeded = isend[2]; 789 info->memory = isend[3]; 790 info->mallocs = isend[4]; 791 } else if (flag == MAT_GLOBAL_MAX) { 792 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 793 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } else if (flag == MAT_GLOBAL_SUM) { 800 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 801 802 info->nz_used = irecv[0]; 803 info->nz_allocated = irecv[1]; 804 info->nz_unneeded = irecv[2]; 805 info->memory = irecv[3]; 806 info->mallocs = irecv[4]; 807 } 808 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 809 info->fill_ratio_needed = 0; 810 info->factor_mallocs = 0; 811 PetscFunctionReturn(0); 812 } 813 814 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 815 { 816 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 817 PetscErrorCode ierr; 818 819 PetscFunctionBegin; 820 switch (op) { 821 case MAT_NEW_NONZERO_LOCATIONS: 822 case MAT_NEW_NONZERO_LOCATION_ERR: 823 case MAT_NEW_NONZERO_ALLOCATION_ERR: 824 MatCheckPreallocated(A,1); 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_ROW_ORIENTED: 828 MatCheckPreallocated(A,1); 829 a->roworiented = flg; 830 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 831 break; 832 case MAT_NEW_DIAGONALS: 833 case MAT_KEEP_NONZERO_PATTERN: 834 case MAT_USE_HASH_TABLE: 835 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 836 break; 837 case MAT_IGNORE_OFF_PROC_ENTRIES: 838 a->donotstash = flg; 839 break; 840 case MAT_SYMMETRIC: 841 case MAT_STRUCTURALLY_SYMMETRIC: 842 case MAT_HERMITIAN: 843 case MAT_SYMMETRY_ETERNAL: 844 case MAT_IGNORE_LOWER_TRIANGULAR: 845 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 846 break; 847 default: 848 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 849 } 850 PetscFunctionReturn(0); 851 } 852 853 854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 855 { 856 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 857 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 858 const PetscScalar *l,*r; 859 PetscScalar x,*v; 860 PetscErrorCode ierr; 861 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 862 863 PetscFunctionBegin; 864 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 865 if (ll) { 866 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 867 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 868 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 869 for (i=0; i<m; i++) { 870 x = l[i]; 871 v = mat->v + i; 872 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 873 } 874 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 875 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 876 } 877 if (rr) { 878 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 879 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 880 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 883 for (i=0; i<n; i++) { 884 x = r[i]; 885 v = mat->v + i*m; 886 for (j=0; j<m; j++) (*v++) *= x; 887 } 888 ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 889 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 890 } 891 PetscFunctionReturn(0); 892 } 893 894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 895 { 896 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 897 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 898 PetscErrorCode ierr; 899 PetscInt i,j; 900 PetscReal sum = 0.0; 901 PetscScalar *v = mat->v; 902 903 PetscFunctionBegin; 904 if (mdn->size == 1) { 905 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 906 } else { 907 if (type == NORM_FROBENIUS) { 908 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 909 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 910 } 911 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 912 *nrm = PetscSqrtReal(*nrm); 913 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 914 } else if (type == NORM_1) { 915 PetscReal *tmp,*tmp2; 916 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 917 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 918 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 919 *nrm = 0.0; 920 v = mat->v; 921 for (j=0; j<mdn->A->cmap->n; j++) { 922 for (i=0; i<mdn->A->rmap->n; i++) { 923 tmp[j] += PetscAbsScalar(*v); v++; 924 } 925 } 926 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 927 for (j=0; j<A->cmap->N; j++) { 928 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 929 } 930 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 931 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 932 } else if (type == NORM_INFINITY) { /* max row norm */ 933 PetscReal ntemp; 934 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 935 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 936 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 937 } 938 PetscFunctionReturn(0); 939 } 940 941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 942 { 943 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 944 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 945 Mat B; 946 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 947 PetscErrorCode ierr; 948 PetscInt j,i; 949 PetscScalar *v; 950 951 PetscFunctionBegin; 952 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 953 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 954 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 955 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 956 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 957 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 958 } else { 959 B = *matout; 960 } 961 962 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 963 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 964 for (i=0; i<m; i++) rwork[i] = rstart + i; 965 for (j=0; j<n; j++) { 966 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 967 v += m; 968 } 969 ierr = PetscFree(rwork);CHKERRQ(ierr); 970 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 973 *matout = B; 974 } else { 975 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 983 984 PetscErrorCode MatSetUp_MPIDense(Mat A) 985 { 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 994 { 995 PetscErrorCode ierr; 996 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 997 998 PetscFunctionBegin; 999 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1000 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1005 { 1006 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 PetscErrorCode MatRealPart_MPIDense(Mat A) 1015 { 1016 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1025 { 1026 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1027 PetscErrorCode ierr; 1028 1029 PetscFunctionBegin; 1030 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1035 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1036 { 1037 PetscErrorCode ierr; 1038 PetscInt i,n; 1039 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1040 PetscReal *work; 1041 1042 PetscFunctionBegin; 1043 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1044 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1045 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1046 if (type == NORM_2) { 1047 for (i=0; i<n; i++) work[i] *= work[i]; 1048 } 1049 if (type == NORM_INFINITY) { 1050 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1051 } else { 1052 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1053 } 1054 ierr = PetscFree(work);CHKERRQ(ierr); 1055 if (type == NORM_2) { 1056 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1062 { 1063 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1064 PetscErrorCode ierr; 1065 PetscScalar *a; 1066 PetscInt m,n,i; 1067 1068 PetscFunctionBegin; 1069 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1070 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1071 for (i=0; i<m*n; i++) { 1072 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1073 } 1074 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1079 1080 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1081 { 1082 PetscFunctionBegin; 1083 *missing = PETSC_FALSE; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* -------------------------------------------------------------------*/ 1088 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1089 MatGetRow_MPIDense, 1090 MatRestoreRow_MPIDense, 1091 MatMult_MPIDense, 1092 /* 4*/ MatMultAdd_MPIDense, 1093 MatMultTranspose_MPIDense, 1094 MatMultTransposeAdd_MPIDense, 1095 0, 1096 0, 1097 0, 1098 /* 10*/ 0, 1099 0, 1100 0, 1101 0, 1102 MatTranspose_MPIDense, 1103 /* 15*/ MatGetInfo_MPIDense, 1104 MatEqual_MPIDense, 1105 MatGetDiagonal_MPIDense, 1106 MatDiagonalScale_MPIDense, 1107 MatNorm_MPIDense, 1108 /* 20*/ MatAssemblyBegin_MPIDense, 1109 MatAssemblyEnd_MPIDense, 1110 MatSetOption_MPIDense, 1111 MatZeroEntries_MPIDense, 1112 /* 24*/ MatZeroRows_MPIDense, 1113 0, 1114 0, 1115 0, 1116 0, 1117 /* 29*/ MatSetUp_MPIDense, 1118 0, 1119 0, 1120 MatGetDiagonalBlock_MPIDense, 1121 0, 1122 /* 34*/ MatDuplicate_MPIDense, 1123 0, 1124 0, 1125 0, 1126 0, 1127 /* 39*/ MatAXPY_MPIDense, 1128 MatCreateSubMatrices_MPIDense, 1129 0, 1130 MatGetValues_MPIDense, 1131 0, 1132 /* 44*/ 0, 1133 MatScale_MPIDense, 1134 MatShift_Basic, 1135 0, 1136 0, 1137 /* 49*/ MatSetRandom_MPIDense, 1138 0, 1139 0, 1140 0, 1141 0, 1142 /* 54*/ 0, 1143 0, 1144 0, 1145 0, 1146 0, 1147 /* 59*/ MatCreateSubMatrix_MPIDense, 1148 MatDestroy_MPIDense, 1149 MatView_MPIDense, 1150 0, 1151 0, 1152 /* 64*/ 0, 1153 0, 1154 0, 1155 0, 1156 0, 1157 /* 69*/ 0, 1158 0, 1159 0, 1160 0, 1161 0, 1162 /* 74*/ 0, 1163 0, 1164 0, 1165 0, 1166 0, 1167 /* 79*/ 0, 1168 0, 1169 0, 1170 0, 1171 /* 83*/ MatLoad_MPIDense, 1172 0, 1173 0, 1174 0, 1175 0, 1176 0, 1177 #if defined(PETSC_HAVE_ELEMENTAL) 1178 /* 89*/ MatMatMult_MPIDense_MPIDense, 1179 MatMatMultSymbolic_MPIDense_MPIDense, 1180 #else 1181 /* 89*/ 0, 1182 0, 1183 #endif 1184 MatMatMultNumeric_MPIDense, 1185 0, 1186 0, 1187 /* 94*/ 0, 1188 0, 1189 0, 1190 0, 1191 0, 1192 /* 99*/ 0, 1193 0, 1194 0, 1195 MatConjugate_MPIDense, 1196 0, 1197 /*104*/ 0, 1198 MatRealPart_MPIDense, 1199 MatImaginaryPart_MPIDense, 1200 0, 1201 0, 1202 /*109*/ 0, 1203 0, 1204 0, 1205 0, 1206 MatMissingDiagonal_MPIDense, 1207 /*114*/ 0, 1208 0, 1209 0, 1210 0, 1211 0, 1212 /*119*/ 0, 1213 0, 1214 0, 1215 0, 1216 0, 1217 /*124*/ 0, 1218 MatGetColumnNorms_MPIDense, 1219 0, 1220 0, 1221 0, 1222 /*129*/ 0, 1223 MatTransposeMatMult_MPIDense_MPIDense, 1224 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1225 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1226 0, 1227 /*134*/ 0, 1228 0, 1229 0, 1230 0, 1231 0, 1232 /*139*/ 0, 1233 0, 1234 0, 1235 0, 1236 0, 1237 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1238 }; 1239 1240 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1241 { 1242 Mat_MPIDense *a; 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 mat->preallocated = PETSC_TRUE; 1247 /* Note: For now, when data is specified above, this assumes the user correctly 1248 allocates the local dense storage space. We should add error checking. */ 1249 1250 a = (Mat_MPIDense*)mat->data; 1251 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1252 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1253 a->nvec = mat->cmap->n; 1254 1255 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1256 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1257 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1258 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1259 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #if defined(PETSC_HAVE_ELEMENTAL) 1264 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1265 { 1266 Mat mat_elemental; 1267 PetscErrorCode ierr; 1268 PetscScalar *v; 1269 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1270 1271 PetscFunctionBegin; 1272 if (reuse == MAT_REUSE_MATRIX) { 1273 mat_elemental = *newmat; 1274 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1275 } else { 1276 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1277 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1278 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1279 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1280 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1281 } 1282 1283 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1284 for (i=0; i<N; i++) cols[i] = i; 1285 for (i=0; i<m; i++) rows[i] = rstart + i; 1286 1287 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1288 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1289 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1290 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1291 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1292 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1293 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1294 1295 if (reuse == MAT_INPLACE_MATRIX) { 1296 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1297 } else { 1298 *newmat = mat_elemental; 1299 } 1300 PetscFunctionReturn(0); 1301 } 1302 #endif 1303 1304 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1305 { 1306 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1307 PetscErrorCode ierr; 1308 1309 PetscFunctionBegin; 1310 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1315 { 1316 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1325 { 1326 PetscErrorCode ierr; 1327 Mat_MPIDense *mat; 1328 PetscInt m,nloc,N; 1329 1330 PetscFunctionBegin; 1331 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1332 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1333 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1334 PetscInt sum; 1335 1336 if (n == PETSC_DECIDE) { 1337 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1338 } 1339 /* Check sum(n) = N */ 1340 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1341 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1342 1343 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1344 } 1345 1346 /* numeric phase */ 1347 mat = (Mat_MPIDense*)(*outmat)->data; 1348 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1349 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1350 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1351 PetscFunctionReturn(0); 1352 } 1353 1354 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1355 { 1356 Mat_MPIDense *a; 1357 PetscErrorCode ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1361 mat->data = (void*)a; 1362 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1363 1364 mat->insertmode = NOT_SET_VALUES; 1365 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1366 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1367 1368 /* build cache for off array entries formed */ 1369 a->donotstash = PETSC_FALSE; 1370 1371 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1372 1373 /* stuff used for matrix vector multiply */ 1374 a->lvec = 0; 1375 a->Mvctx = 0; 1376 a->roworiented = PETSC_TRUE; 1377 1378 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1379 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1380 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1381 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1382 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1383 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1384 #if defined(PETSC_HAVE_ELEMENTAL) 1385 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1386 #endif 1387 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1388 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1389 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1390 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1391 1392 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1393 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1394 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1395 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1396 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1397 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 /*MC 1402 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1403 1404 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1405 and MATMPIDENSE otherwise. 1406 1407 Options Database Keys: 1408 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1409 1410 Level: beginner 1411 1412 1413 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1414 M*/ 1415 1416 /*@C 1417 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1418 1419 Not collective 1420 1421 Input Parameters: 1422 . B - the matrix 1423 - data - optional location of matrix data. Set data=NULL for PETSc 1424 to control all matrix memory allocation. 1425 1426 Notes: 1427 The dense format is fully compatible with standard Fortran 77 1428 storage by columns. 1429 1430 The data input variable is intended primarily for Fortran programmers 1431 who wish to allocate their own matrix memory space. Most users should 1432 set data=NULL. 1433 1434 Level: intermediate 1435 1436 .keywords: matrix,dense, parallel 1437 1438 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1439 @*/ 1440 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1441 { 1442 PetscErrorCode ierr; 1443 1444 PetscFunctionBegin; 1445 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 /*@ 1450 MatDensePlaceArray - Allows one to replace the array in a dense array with an 1451 array provided by the user. This is useful to avoid copying an array 1452 into a matrix 1453 1454 Not Collective 1455 1456 Input Parameters: 1457 + mat - the matrix 1458 - array - the array in column major order 1459 1460 Notes: 1461 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1462 freed when the matrix is destroyed. 1463 1464 Level: developer 1465 1466 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1467 1468 @*/ 1469 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1470 { 1471 PetscErrorCode ierr; 1472 PetscFunctionBegin; 1473 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1474 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 /*@ 1479 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1480 1481 Not Collective 1482 1483 Input Parameters: 1484 . mat - the matrix 1485 1486 Notes: 1487 You can only call this after a call to MatDensePlaceArray() 1488 1489 Level: developer 1490 1491 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1492 1493 @*/ 1494 PetscErrorCode MatDenseResetArray(Mat mat) 1495 { 1496 PetscErrorCode ierr; 1497 PetscFunctionBegin; 1498 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1499 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 1503 /*@C 1504 MatCreateDense - Creates a parallel matrix in dense format. 1505 1506 Collective on MPI_Comm 1507 1508 Input Parameters: 1509 + comm - MPI communicator 1510 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1511 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1512 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1513 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1514 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1515 to control all matrix memory allocation. 1516 1517 Output Parameter: 1518 . A - the matrix 1519 1520 Notes: 1521 The dense format is fully compatible with standard Fortran 77 1522 storage by columns. 1523 1524 The data input variable is intended primarily for Fortran programmers 1525 who wish to allocate their own matrix memory space. Most users should 1526 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1527 1528 The user MUST specify either the local or global matrix dimensions 1529 (possibly both). 1530 1531 Level: intermediate 1532 1533 .keywords: matrix,dense, parallel 1534 1535 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1536 @*/ 1537 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1538 { 1539 PetscErrorCode ierr; 1540 PetscMPIInt size; 1541 1542 PetscFunctionBegin; 1543 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1544 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1545 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1546 if (size > 1) { 1547 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1548 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1549 if (data) { /* user provided data array, so no need to assemble */ 1550 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1551 (*A)->assembled = PETSC_TRUE; 1552 } 1553 } else { 1554 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1555 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1556 } 1557 PetscFunctionReturn(0); 1558 } 1559 1560 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1561 { 1562 Mat mat; 1563 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1564 PetscErrorCode ierr; 1565 1566 PetscFunctionBegin; 1567 *newmat = 0; 1568 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1569 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1570 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1571 a = (Mat_MPIDense*)mat->data; 1572 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1573 1574 mat->factortype = A->factortype; 1575 mat->assembled = PETSC_TRUE; 1576 mat->preallocated = PETSC_TRUE; 1577 1578 a->size = oldmat->size; 1579 a->rank = oldmat->rank; 1580 mat->insertmode = NOT_SET_VALUES; 1581 a->nvec = oldmat->nvec; 1582 a->donotstash = oldmat->donotstash; 1583 1584 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1585 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1586 1587 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1588 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1589 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1590 1591 *newmat = mat; 1592 PetscFunctionReturn(0); 1593 } 1594 1595 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1596 { 1597 PetscErrorCode ierr; 1598 PetscMPIInt rank,size; 1599 const PetscInt *rowners; 1600 PetscInt i,m,n,nz,j,mMax; 1601 PetscScalar *array,*vals,*vals_ptr; 1602 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1603 1604 PetscFunctionBegin; 1605 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1607 1608 /* determine ownership of rows and columns */ 1609 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1610 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1611 1612 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1613 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1614 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1615 } 1616 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1617 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1618 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1619 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1620 if (!rank) { 1621 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1622 1623 /* read in my part of the matrix numerical values */ 1624 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1625 1626 /* insert into matrix-by row (this is why cannot directly read into array */ 1627 vals_ptr = vals; 1628 for (i=0; i<m; i++) { 1629 for (j=0; j<N; j++) { 1630 array[i + j*m] = *vals_ptr++; 1631 } 1632 } 1633 1634 /* read in other processors and ship out */ 1635 for (i=1; i<size; i++) { 1636 nz = (rowners[i+1] - rowners[i])*N; 1637 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1638 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1639 } 1640 } else { 1641 /* receive numeric values */ 1642 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1643 1644 /* receive message of values*/ 1645 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1646 1647 /* insert into matrix-by row (this is why cannot directly read into array */ 1648 vals_ptr = vals; 1649 for (i=0; i<m; i++) { 1650 for (j=0; j<N; j++) { 1651 array[i + j*m] = *vals_ptr++; 1652 } 1653 } 1654 } 1655 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1656 ierr = PetscFree(vals);CHKERRQ(ierr); 1657 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1658 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 PetscFunctionReturn(0); 1660 } 1661 1662 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1663 { 1664 Mat_MPIDense *a; 1665 PetscScalar *vals,*svals; 1666 MPI_Comm comm; 1667 MPI_Status status; 1668 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1669 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1670 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1671 PetscInt i,nz,j,rstart,rend; 1672 int fd; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 /* force binary viewer to load .info file if it has not yet done so */ 1677 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1678 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1679 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1680 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1681 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1682 if (!rank) { 1683 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1684 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1685 } 1686 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1687 M = header[1]; N = header[2]; nz = header[3]; 1688 1689 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1690 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1691 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1692 1693 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1694 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1695 1696 /* 1697 Handle case where matrix is stored on disk as a dense matrix 1698 */ 1699 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1700 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 /* determine ownership of all rows */ 1705 if (newmat->rmap->n < 0) { 1706 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1707 } else { 1708 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1709 } 1710 if (newmat->cmap->n < 0) { 1711 n = PETSC_DECIDE; 1712 } else { 1713 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1714 } 1715 1716 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1717 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1718 rowners[0] = 0; 1719 for (i=2; i<=size; i++) { 1720 rowners[i] += rowners[i-1]; 1721 } 1722 rstart = rowners[rank]; 1723 rend = rowners[rank+1]; 1724 1725 /* distribute row lengths to all processors */ 1726 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1727 if (!rank) { 1728 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1729 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1730 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1731 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1732 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1733 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1734 } else { 1735 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1736 } 1737 1738 if (!rank) { 1739 /* calculate the number of nonzeros on each processor */ 1740 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1741 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1742 for (i=0; i<size; i++) { 1743 for (j=rowners[i]; j< rowners[i+1]; j++) { 1744 procsnz[i] += rowlengths[j]; 1745 } 1746 } 1747 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1748 1749 /* determine max buffer needed and allocate it */ 1750 maxnz = 0; 1751 for (i=0; i<size; i++) { 1752 maxnz = PetscMax(maxnz,procsnz[i]); 1753 } 1754 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1755 1756 /* read in my part of the matrix column indices */ 1757 nz = procsnz[0]; 1758 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1759 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1760 1761 /* read in every one elses and ship off */ 1762 for (i=1; i<size; i++) { 1763 nz = procsnz[i]; 1764 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1765 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1766 } 1767 ierr = PetscFree(cols);CHKERRQ(ierr); 1768 } else { 1769 /* determine buffer space needed for message */ 1770 nz = 0; 1771 for (i=0; i<m; i++) { 1772 nz += ourlens[i]; 1773 } 1774 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1775 1776 /* receive message of column indices*/ 1777 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1778 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1779 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1780 } 1781 1782 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1783 a = (Mat_MPIDense*)newmat->data; 1784 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1785 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1786 } 1787 1788 if (!rank) { 1789 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1790 1791 /* read in my part of the matrix numerical values */ 1792 nz = procsnz[0]; 1793 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1794 1795 /* insert into matrix */ 1796 jj = rstart; 1797 smycols = mycols; 1798 svals = vals; 1799 for (i=0; i<m; i++) { 1800 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1801 smycols += ourlens[i]; 1802 svals += ourlens[i]; 1803 jj++; 1804 } 1805 1806 /* read in other processors and ship out */ 1807 for (i=1; i<size; i++) { 1808 nz = procsnz[i]; 1809 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1810 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1811 } 1812 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1813 } else { 1814 /* receive numeric values */ 1815 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1816 1817 /* receive message of values*/ 1818 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1819 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1820 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1821 1822 /* insert into matrix */ 1823 jj = rstart; 1824 smycols = mycols; 1825 svals = vals; 1826 for (i=0; i<m; i++) { 1827 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1828 smycols += ourlens[i]; 1829 svals += ourlens[i]; 1830 jj++; 1831 } 1832 } 1833 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1834 ierr = PetscFree(vals);CHKERRQ(ierr); 1835 ierr = PetscFree(mycols);CHKERRQ(ierr); 1836 ierr = PetscFree(rowners);CHKERRQ(ierr); 1837 1838 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1839 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1844 { 1845 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1846 Mat a,b; 1847 PetscBool flg; 1848 PetscErrorCode ierr; 1849 1850 PetscFunctionBegin; 1851 a = matA->A; 1852 b = matB->A; 1853 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1854 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1855 PetscFunctionReturn(0); 1856 } 1857 1858 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1859 { 1860 PetscErrorCode ierr; 1861 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1862 Mat_TransMatMultDense *atb = a->atbdense; 1863 1864 PetscFunctionBegin; 1865 ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1866 ierr = (atb->destroy)(A);CHKERRQ(ierr); 1867 ierr = PetscFree(atb);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1872 { 1873 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1874 Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1875 Mat_TransMatMultDense *atb = c->atbdense; 1876 PetscErrorCode ierr; 1877 MPI_Comm comm; 1878 PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1879 PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1880 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1881 PetscScalar _DOne=1.0,_DZero=0.0; 1882 PetscBLASInt am,an,bn,aN; 1883 const PetscInt *ranges; 1884 1885 PetscFunctionBegin; 1886 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1889 1890 /* compute atbarray = aseq^T * bseq */ 1891 ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1892 ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1894 ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1895 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1896 1897 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1898 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1899 1900 /* arrange atbarray into sendbuf */ 1901 k = 0; 1902 for (proc=0; proc<size; proc++) { 1903 for (j=0; j<cN; j++) { 1904 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1905 } 1906 } 1907 /* sum all atbarray to local values of C */ 1908 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 1909 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1910 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1911 PetscFunctionReturn(0); 1912 } 1913 1914 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1915 { 1916 PetscErrorCode ierr; 1917 Mat Cdense; 1918 MPI_Comm comm; 1919 PetscMPIInt size; 1920 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1921 Mat_MPIDense *c; 1922 Mat_TransMatMultDense *atb; 1923 1924 PetscFunctionBegin; 1925 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1926 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1927 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1928 } 1929 1930 /* create matrix product Cdense */ 1931 ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1932 ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1933 ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1934 ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1935 ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1936 ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1937 *C = Cdense; 1938 1939 /* create data structure for reuse Cdense */ 1940 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1941 ierr = PetscNew(&atb);CHKERRQ(ierr); 1942 cM = Cdense->rmap->N; 1943 ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1944 1945 c = (Mat_MPIDense*)Cdense->data; 1946 c->atbdense = atb; 1947 atb->destroy = Cdense->ops->destroy; 1948 Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1953 { 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 if (scall == MAT_INITIAL_MATRIX) { 1958 ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1959 } 1960 ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1965 { 1966 PetscErrorCode ierr; 1967 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1968 Mat_MatMultDense *ab = a->abdense; 1969 1970 PetscFunctionBegin; 1971 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1972 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1973 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1974 1975 ierr = (ab->destroy)(A);CHKERRQ(ierr); 1976 ierr = PetscFree(ab);CHKERRQ(ierr); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #if defined(PETSC_HAVE_ELEMENTAL) 1981 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1982 { 1983 PetscErrorCode ierr; 1984 Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1985 Mat_MatMultDense *ab=c->abdense; 1986 1987 PetscFunctionBegin; 1988 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1989 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1990 ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1991 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1992 PetscFunctionReturn(0); 1993 } 1994 1995 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1996 { 1997 PetscErrorCode ierr; 1998 Mat Ae,Be,Ce; 1999 Mat_MPIDense *c; 2000 Mat_MatMultDense *ab; 2001 2002 PetscFunctionBegin; 2003 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2004 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2005 } 2006 2007 /* convert A and B to Elemental matrices Ae and Be */ 2008 ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2009 ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2010 2011 /* Ce = Ae*Be */ 2012 ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2013 ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2014 2015 /* convert Ce to C */ 2016 ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2017 2018 /* create data structure for reuse Cdense */ 2019 ierr = PetscNew(&ab);CHKERRQ(ierr); 2020 c = (Mat_MPIDense*)(*C)->data; 2021 c->abdense = ab; 2022 2023 ab->Ae = Ae; 2024 ab->Be = Be; 2025 ab->Ce = Ce; 2026 ab->destroy = (*C)->ops->destroy; 2027 (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2028 (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2029 PetscFunctionReturn(0); 2030 } 2031 2032 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2033 { 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 2038 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2039 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2040 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2041 } else { 2042 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2043 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2044 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2045 } 2046 PetscFunctionReturn(0); 2047 } 2048 #endif 2049 2050