1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatDenseGetLocalMatrix" 12 /*@ 13 14 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 15 matrix that represents the operator. For sequential matrices it returns itself. 16 17 Input Parameter: 18 . A - the Seq or MPI dense matrix 19 20 Output Parameter: 21 . B - the inner matrix 22 23 Level: intermediate 24 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 29 PetscErrorCode ierr; 30 PetscBool flg; 31 32 PetscFunctionBegin; 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRow_MPIDense" 41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 42 { 43 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 44 PetscErrorCode ierr; 45 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 46 47 PetscFunctionBegin; 48 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 49 lrow = row - rstart; 50 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatRestoreRow_MPIDense" 56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 57 { 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 62 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 68 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 69 { 70 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 71 PetscErrorCode ierr; 72 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 73 PetscScalar *array; 74 MPI_Comm comm; 75 Mat B; 76 77 PetscFunctionBegin; 78 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 79 80 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 81 if (!B) { 82 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 83 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 84 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 85 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 86 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 88 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 89 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 92 *a = B; 93 ierr = MatDestroy(&B);CHKERRQ(ierr); 94 } else *a = B; 95 PetscFunctionReturn(0); 96 } 97 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatSetValues_MPIDense" 100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 101 { 102 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 103 PetscErrorCode ierr; 104 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 105 PetscBool roworiented = A->roworiented; 106 107 PetscFunctionBegin; 108 for (i=0; i<m; i++) { 109 if (idxm[i] < 0) continue; 110 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 111 if (idxm[i] >= rstart && idxm[i] < rend) { 112 row = idxm[i] - rstart; 113 if (roworiented) { 114 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 115 } else { 116 for (j=0; j<n; j++) { 117 if (idxn[j] < 0) continue; 118 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 119 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 120 } 121 } 122 } else if (!A->donotstash) { 123 mat->assembled = PETSC_FALSE; 124 if (roworiented) { 125 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 126 } else { 127 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 128 } 129 } 130 } 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatGetValues_MPIDense" 136 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 137 { 138 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 141 142 PetscFunctionBegin; 143 for (i=0; i<m; i++) { 144 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 145 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 146 if (idxm[i] >= rstart && idxm[i] < rend) { 147 row = idxm[i] - rstart; 148 for (j=0; j<n; j++) { 149 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 150 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 151 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 152 } 153 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatDenseGetArray_MPIDense" 160 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 161 { 162 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 163 PetscErrorCode ierr; 164 165 PetscFunctionBegin; 166 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 172 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 173 { 174 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 175 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 176 PetscErrorCode ierr; 177 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 178 const PetscInt *irow,*icol; 179 PetscScalar *av,*bv,*v = lmat->v; 180 Mat newmat; 181 IS iscol_local; 182 183 PetscFunctionBegin; 184 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 185 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 186 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 187 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 189 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 190 191 /* No parallel redistribution currently supported! Should really check each index set 192 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 193 original matrix! */ 194 195 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 196 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 197 198 /* Check submatrix call */ 199 if (scall == MAT_REUSE_MATRIX) { 200 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 201 /* Really need to test rows and column sizes! */ 202 newmat = *B; 203 } else { 204 /* Create and fill new matrix */ 205 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 206 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 207 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 208 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 209 } 210 211 /* Now extract the data pointers and do the copy, column at a time */ 212 newmatd = (Mat_MPIDense*)newmat->data; 213 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 214 215 for (i=0; i<Ncols; i++) { 216 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 217 for (j=0; j<nrows; j++) { 218 *bv++ = av[irow[j] - rstart]; 219 } 220 } 221 222 /* Assemble the matrices so that the correct flags are set */ 223 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 226 /* Free work space */ 227 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 228 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 229 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 230 *B = newmat; 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 236 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 237 { 238 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 239 PetscErrorCode ierr; 240 241 PetscFunctionBegin; 242 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 248 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 249 { 250 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 251 MPI_Comm comm; 252 PetscErrorCode ierr; 253 PetscInt nstash,reallocs; 254 InsertMode addv; 255 256 PetscFunctionBegin; 257 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 258 /* make sure all processors are either in INSERTMODE or ADDMODE */ 259 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 260 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 261 mat->insertmode = addv; /* in case this processor had no cache */ 262 263 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 264 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 265 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 271 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 272 { 273 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 274 PetscErrorCode ierr; 275 PetscInt i,*row,*col,flg,j,rstart,ncols; 276 PetscMPIInt n; 277 PetscScalar *val; 278 279 PetscFunctionBegin; 280 /* wait on receives */ 281 while (1) { 282 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 283 if (!flg) break; 284 285 for (i=0; i<n;) { 286 /* Now identify the consecutive vals belonging to the same row */ 287 for (j=i,rstart=row[j]; j<n; j++) { 288 if (row[j] != rstart) break; 289 } 290 if (j < n) ncols = j-i; 291 else ncols = n-i; 292 /* Now assemble all these values with a single function call */ 293 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 294 i = j; 295 } 296 } 297 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 298 299 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 300 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 301 302 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 303 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 304 } 305 PetscFunctionReturn(0); 306 } 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatZeroEntries_MPIDense" 310 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 311 { 312 PetscErrorCode ierr; 313 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 314 315 PetscFunctionBegin; 316 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 /* the code does not do the diagonal entries correctly unless the 321 matrix is square and the column and row owerships are identical. 322 This is a BUG. The only way to fix it seems to be to access 323 mdn->A and mdn->B directly and not through the MatZeroRows() 324 routine. 325 */ 326 #undef __FUNCT__ 327 #define __FUNCT__ "MatZeroRows_MPIDense" 328 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 329 { 330 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 331 PetscErrorCode ierr; 332 PetscInt i,*owners = A->rmap->range; 333 PetscInt *sizes,j,idx,nsends; 334 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 335 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 336 PetscInt *lens,*lrows,*values; 337 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 338 MPI_Comm comm; 339 MPI_Request *send_waits,*recv_waits; 340 MPI_Status recv_status,*send_status; 341 PetscBool found; 342 const PetscScalar *xx; 343 PetscScalar *bb; 344 345 PetscFunctionBegin; 346 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 347 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 348 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 349 /* first count number of contributors to each processor */ 350 ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 351 ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 352 for (i=0; i<N; i++) { 353 idx = rows[i]; 354 found = PETSC_FALSE; 355 for (j=0; j<size; j++) { 356 if (idx >= owners[j] && idx < owners[j+1]) { 357 sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 358 } 359 } 360 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 361 } 362 nsends = 0; 363 for (i=0; i<size; i++) nsends += sizes[2*i+1]; 364 365 /* inform other processors of number of messages and max length*/ 366 ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 367 368 /* post receives: */ 369 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 370 ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 371 for (i=0; i<nrecvs; i++) { 372 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 373 } 374 375 /* do sends: 376 1) starts[i] gives the starting index in svalues for stuff going to 377 the ith processor 378 */ 379 ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 380 ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 381 ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 382 383 starts[0] = 0; 384 for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 385 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 386 387 starts[0] = 0; 388 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 389 count = 0; 390 for (i=0; i<size; i++) { 391 if (sizes[2*i+1]) { 392 ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 393 } 394 } 395 ierr = PetscFree(starts);CHKERRQ(ierr); 396 397 base = owners[rank]; 398 399 /* wait on receives */ 400 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 401 count = nrecvs; 402 slen = 0; 403 while (count) { 404 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 405 /* unpack receives into our local space */ 406 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 407 408 source[imdex] = recv_status.MPI_SOURCE; 409 lens[imdex] = n; 410 slen += n; 411 count--; 412 } 413 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 414 415 /* move the data into the send scatter */ 416 ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 417 count = 0; 418 for (i=0; i<nrecvs; i++) { 419 values = rvalues + i*nmax; 420 for (j=0; j<lens[i]; j++) { 421 lrows[count++] = values[j] - base; 422 } 423 } 424 ierr = PetscFree(rvalues);CHKERRQ(ierr); 425 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 426 ierr = PetscFree(owner);CHKERRQ(ierr); 427 ierr = PetscFree(sizes);CHKERRQ(ierr); 428 429 /* fix right hand side if needed */ 430 if (x && b) { 431 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 432 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 433 for (i=0; i<slen; i++) { 434 bb[lrows[i]] = diag*xx[lrows[i]]; 435 } 436 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 437 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 438 } 439 440 /* actually zap the local rows */ 441 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 442 if (diag != 0.0) { 443 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 444 PetscInt m = ll->lda, i; 445 446 for (i=0; i<slen; i++) { 447 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 448 } 449 } 450 ierr = PetscFree(lrows);CHKERRQ(ierr); 451 452 /* wait on sends */ 453 if (nsends) { 454 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 455 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 456 ierr = PetscFree(send_status);CHKERRQ(ierr); 457 } 458 ierr = PetscFree(send_waits);CHKERRQ(ierr); 459 ierr = PetscFree(svalues);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatMult_MPIDense" 465 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 466 { 467 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 468 PetscErrorCode ierr; 469 470 PetscFunctionBegin; 471 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatMultAdd_MPIDense" 479 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 480 { 481 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 482 PetscErrorCode ierr; 483 484 PetscFunctionBegin; 485 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 487 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatMultTranspose_MPIDense" 493 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 494 { 495 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 496 PetscErrorCode ierr; 497 PetscScalar zero = 0.0; 498 499 PetscFunctionBegin; 500 ierr = VecSet(yy,zero);CHKERRQ(ierr); 501 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 502 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 503 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 509 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 510 { 511 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 512 PetscErrorCode ierr; 513 514 PetscFunctionBegin; 515 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 516 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 517 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatGetDiagonal_MPIDense" 524 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 525 { 526 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 527 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 528 PetscErrorCode ierr; 529 PetscInt len,i,n,m = A->rmap->n,radd; 530 PetscScalar *x,zero = 0.0; 531 532 PetscFunctionBegin; 533 ierr = VecSet(v,zero);CHKERRQ(ierr); 534 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 535 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 536 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 537 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 538 radd = A->rmap->rstart*m; 539 for (i=0; i<len; i++) { 540 x[i] = aloc->v[radd + i*m + i]; 541 } 542 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatDestroy_MPIDense" 548 PetscErrorCode MatDestroy_MPIDense(Mat mat) 549 { 550 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 551 PetscErrorCode ierr; 552 553 PetscFunctionBegin; 554 #if defined(PETSC_USE_LOG) 555 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 556 #endif 557 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 558 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 559 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 560 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 561 562 ierr = PetscFree(mat->data);CHKERRQ(ierr); 563 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 564 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 567 #if defined(PETSC_HAVE_ELEMENTAL) 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 569 #endif 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatView_MPIDense_Binary" 583 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 584 { 585 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 586 PetscErrorCode ierr; 587 PetscViewerFormat format; 588 int fd; 589 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 590 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 591 PetscScalar *work,*v,*vv; 592 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 593 594 PetscFunctionBegin; 595 if (mdn->size == 1) { 596 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 597 } else { 598 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 599 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 600 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 601 602 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 603 if (format == PETSC_VIEWER_NATIVE) { 604 605 if (!rank) { 606 /* store the matrix as a dense matrix */ 607 header[0] = MAT_FILE_CLASSID; 608 header[1] = mat->rmap->N; 609 header[2] = N; 610 header[3] = MATRIX_BINARY_FORMAT_DENSE; 611 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 612 613 /* get largest work array needed for transposing array */ 614 mmax = mat->rmap->n; 615 for (i=1; i<size; i++) { 616 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 617 } 618 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 619 620 /* write out local array, by rows */ 621 m = mat->rmap->n; 622 v = a->v; 623 for (j=0; j<N; j++) { 624 for (i=0; i<m; i++) { 625 work[j + i*N] = *v++; 626 } 627 } 628 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 629 /* get largest work array to receive messages from other processes, excludes process zero */ 630 mmax = 0; 631 for (i=1; i<size; i++) { 632 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 633 } 634 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 635 for (k = 1; k < size; k++) { 636 v = vv; 637 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 638 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 639 640 for (j = 0; j < N; j++) { 641 for (i = 0; i < m; i++) { 642 work[j + i*N] = *v++; 643 } 644 } 645 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 646 } 647 ierr = PetscFree(work);CHKERRQ(ierr); 648 ierr = PetscFree(vv);CHKERRQ(ierr); 649 } else { 650 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 651 } 652 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 653 } 654 PetscFunctionReturn(0); 655 } 656 657 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 658 #include <petscdraw.h> 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 661 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 662 { 663 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 664 PetscErrorCode ierr; 665 PetscMPIInt rank = mdn->rank; 666 PetscViewerType vtype; 667 PetscBool iascii,isdraw; 668 PetscViewer sviewer; 669 PetscViewerFormat format; 670 671 PetscFunctionBegin; 672 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 673 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 674 if (iascii) { 675 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 676 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 677 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 678 MatInfo info; 679 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 682 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 683 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 684 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } else if (format == PETSC_VIEWER_ASCII_INFO) { 687 PetscFunctionReturn(0); 688 } 689 } else if (isdraw) { 690 PetscDraw draw; 691 PetscBool isnull; 692 693 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 694 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 695 if (isnull) PetscFunctionReturn(0); 696 } 697 698 { 699 /* assemble the entire matrix onto first processor. */ 700 Mat A; 701 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 702 PetscInt *cols; 703 PetscScalar *vals; 704 705 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 706 if (!rank) { 707 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 708 } else { 709 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 710 } 711 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 712 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 713 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 714 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 715 716 /* Copy the matrix ... This isn't the most efficient means, 717 but it's quick for now */ 718 A->insertmode = INSERT_VALUES; 719 720 row = mat->rmap->rstart; 721 m = mdn->A->rmap->n; 722 for (i=0; i<m; i++) { 723 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 724 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 725 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 726 row++; 727 } 728 729 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 730 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 731 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 732 if (!rank) { 733 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 734 } 735 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 736 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 737 ierr = MatDestroy(&A);CHKERRQ(ierr); 738 } 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatView_MPIDense" 744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 745 { 746 PetscErrorCode ierr; 747 PetscBool iascii,isbinary,isdraw,issocket; 748 749 PetscFunctionBegin; 750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 752 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 753 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 754 755 if (iascii || issocket || isdraw) { 756 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 757 } else if (isbinary) { 758 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatGetInfo_MPIDense" 765 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 766 { 767 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 768 Mat mdn = mat->A; 769 PetscErrorCode ierr; 770 PetscReal isend[5],irecv[5]; 771 772 PetscFunctionBegin; 773 info->block_size = 1.0; 774 775 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 776 777 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 778 isend[3] = info->memory; isend[4] = info->mallocs; 779 if (flag == MAT_LOCAL) { 780 info->nz_used = isend[0]; 781 info->nz_allocated = isend[1]; 782 info->nz_unneeded = isend[2]; 783 info->memory = isend[3]; 784 info->mallocs = isend[4]; 785 } else if (flag == MAT_GLOBAL_MAX) { 786 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 787 788 info->nz_used = irecv[0]; 789 info->nz_allocated = irecv[1]; 790 info->nz_unneeded = irecv[2]; 791 info->memory = irecv[3]; 792 info->mallocs = irecv[4]; 793 } else if (flag == MAT_GLOBAL_SUM) { 794 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 795 796 info->nz_used = irecv[0]; 797 info->nz_allocated = irecv[1]; 798 info->nz_unneeded = irecv[2]; 799 info->memory = irecv[3]; 800 info->mallocs = irecv[4]; 801 } 802 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 803 info->fill_ratio_needed = 0; 804 info->factor_mallocs = 0; 805 PetscFunctionReturn(0); 806 } 807 808 #undef __FUNCT__ 809 #define __FUNCT__ "MatSetOption_MPIDense" 810 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 811 { 812 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 813 PetscErrorCode ierr; 814 815 PetscFunctionBegin; 816 switch (op) { 817 case MAT_NEW_NONZERO_LOCATIONS: 818 case MAT_NEW_NONZERO_LOCATION_ERR: 819 case MAT_NEW_NONZERO_ALLOCATION_ERR: 820 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 821 break; 822 case MAT_ROW_ORIENTED: 823 a->roworiented = flg; 824 825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 826 break; 827 case MAT_NEW_DIAGONALS: 828 case MAT_KEEP_NONZERO_PATTERN: 829 case MAT_USE_HASH_TABLE: 830 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 831 break; 832 case MAT_IGNORE_OFF_PROC_ENTRIES: 833 a->donotstash = flg; 834 break; 835 case MAT_SYMMETRIC: 836 case MAT_STRUCTURALLY_SYMMETRIC: 837 case MAT_HERMITIAN: 838 case MAT_SYMMETRY_ETERNAL: 839 case MAT_IGNORE_LOWER_TRIANGULAR: 840 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 841 break; 842 default: 843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 844 } 845 PetscFunctionReturn(0); 846 } 847 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatDiagonalScale_MPIDense" 851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 852 { 853 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 854 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 855 PetscScalar *l,*r,x,*v; 856 PetscErrorCode ierr; 857 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 858 859 PetscFunctionBegin; 860 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 861 if (ll) { 862 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 863 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 864 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 865 for (i=0; i<m; i++) { 866 x = l[i]; 867 v = mat->v + i; 868 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 869 } 870 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 871 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 872 } 873 if (rr) { 874 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 875 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 876 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 879 for (i=0; i<n; i++) { 880 x = r[i]; 881 v = mat->v + i*m; 882 for (j=0; j<m; j++) (*v++) *= x; 883 } 884 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 885 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 886 } 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatNorm_MPIDense" 892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 893 { 894 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 895 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 896 PetscErrorCode ierr; 897 PetscInt i,j; 898 PetscReal sum = 0.0; 899 PetscScalar *v = mat->v; 900 901 PetscFunctionBegin; 902 if (mdn->size == 1) { 903 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 904 } else { 905 if (type == NORM_FROBENIUS) { 906 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 907 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 908 } 909 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 910 *nrm = PetscSqrtReal(*nrm); 911 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 912 } else if (type == NORM_1) { 913 PetscReal *tmp,*tmp2; 914 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 915 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 916 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 917 *nrm = 0.0; 918 v = mat->v; 919 for (j=0; j<mdn->A->cmap->n; j++) { 920 for (i=0; i<mdn->A->rmap->n; i++) { 921 tmp[j] += PetscAbsScalar(*v); v++; 922 } 923 } 924 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 925 for (j=0; j<A->cmap->N; j++) { 926 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 927 } 928 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 929 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 930 } else if (type == NORM_INFINITY) { /* max row norm */ 931 PetscReal ntemp; 932 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 933 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 934 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 935 } 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatTranspose_MPIDense" 941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 942 { 943 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 944 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 945 Mat B; 946 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 947 PetscErrorCode ierr; 948 PetscInt j,i; 949 PetscScalar *v; 950 951 PetscFunctionBegin; 952 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 953 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 954 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 955 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 956 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 957 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 958 } else { 959 B = *matout; 960 } 961 962 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 963 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 964 for (i=0; i<m; i++) rwork[i] = rstart + i; 965 for (j=0; j<n; j++) { 966 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 967 v += m; 968 } 969 ierr = PetscFree(rwork);CHKERRQ(ierr); 970 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 973 *matout = B; 974 } else { 975 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatSetUp_MPIDense" 986 PetscErrorCode MatSetUp_MPIDense(Mat A) 987 { 988 PetscErrorCode ierr; 989 990 PetscFunctionBegin; 991 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatAXPY_MPIDense" 997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 998 { 999 PetscErrorCode ierr; 1000 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1001 1002 PetscFunctionBegin; 1003 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1004 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatConjugate_MPIDense" 1010 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1011 { 1012 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1013 PetscErrorCode ierr; 1014 1015 PetscFunctionBegin; 1016 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatRealPart_MPIDense" 1022 PetscErrorCode MatRealPart_MPIDense(Mat A) 1023 { 1024 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1025 PetscErrorCode ierr; 1026 1027 PetscFunctionBegin; 1028 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1029 PetscFunctionReturn(0); 1030 } 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1035 { 1036 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1037 PetscErrorCode ierr; 1038 1039 PetscFunctionBegin; 1040 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1047 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1048 { 1049 PetscErrorCode ierr; 1050 PetscInt i,n; 1051 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1052 PetscReal *work; 1053 1054 PetscFunctionBegin; 1055 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1056 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1057 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1058 if (type == NORM_2) { 1059 for (i=0; i<n; i++) work[i] *= work[i]; 1060 } 1061 if (type == NORM_INFINITY) { 1062 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1063 } else { 1064 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1065 } 1066 ierr = PetscFree(work);CHKERRQ(ierr); 1067 if (type == NORM_2) { 1068 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatSetRandom_MPIDense" 1075 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1076 { 1077 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1078 PetscErrorCode ierr; 1079 PetscScalar *a; 1080 PetscInt m,n,i; 1081 1082 PetscFunctionBegin; 1083 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1084 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1085 for (i=0; i<m*n; i++) { 1086 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1087 } 1088 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatMissingDiagonal_MPIDense" 1096 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1097 { 1098 PetscFunctionBegin; 1099 *missing = PETSC_FALSE; 1100 PetscFunctionReturn(0); 1101 } 1102 1103 /* -------------------------------------------------------------------*/ 1104 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1105 MatGetRow_MPIDense, 1106 MatRestoreRow_MPIDense, 1107 MatMult_MPIDense, 1108 /* 4*/ MatMultAdd_MPIDense, 1109 MatMultTranspose_MPIDense, 1110 MatMultTransposeAdd_MPIDense, 1111 0, 1112 0, 1113 0, 1114 /* 10*/ 0, 1115 0, 1116 0, 1117 0, 1118 MatTranspose_MPIDense, 1119 /* 15*/ MatGetInfo_MPIDense, 1120 MatEqual_MPIDense, 1121 MatGetDiagonal_MPIDense, 1122 MatDiagonalScale_MPIDense, 1123 MatNorm_MPIDense, 1124 /* 20*/ MatAssemblyBegin_MPIDense, 1125 MatAssemblyEnd_MPIDense, 1126 MatSetOption_MPIDense, 1127 MatZeroEntries_MPIDense, 1128 /* 24*/ MatZeroRows_MPIDense, 1129 0, 1130 0, 1131 0, 1132 0, 1133 /* 29*/ MatSetUp_MPIDense, 1134 0, 1135 0, 1136 0, 1137 0, 1138 /* 34*/ MatDuplicate_MPIDense, 1139 0, 1140 0, 1141 0, 1142 0, 1143 /* 39*/ MatAXPY_MPIDense, 1144 MatGetSubMatrices_MPIDense, 1145 0, 1146 MatGetValues_MPIDense, 1147 0, 1148 /* 44*/ 0, 1149 MatScale_MPIDense, 1150 MatShift_Basic, 1151 0, 1152 0, 1153 /* 49*/ MatSetRandom_MPIDense, 1154 0, 1155 0, 1156 0, 1157 0, 1158 /* 54*/ 0, 1159 0, 1160 0, 1161 0, 1162 0, 1163 /* 59*/ MatGetSubMatrix_MPIDense, 1164 MatDestroy_MPIDense, 1165 MatView_MPIDense, 1166 0, 1167 0, 1168 /* 64*/ 0, 1169 0, 1170 0, 1171 0, 1172 0, 1173 /* 69*/ 0, 1174 0, 1175 0, 1176 0, 1177 0, 1178 /* 74*/ 0, 1179 0, 1180 0, 1181 0, 1182 0, 1183 /* 79*/ 0, 1184 0, 1185 0, 1186 0, 1187 /* 83*/ MatLoad_MPIDense, 1188 0, 1189 0, 1190 0, 1191 0, 1192 0, 1193 /* 89*/ 1194 0, 1195 0, 1196 MatMatMultNumeric_MPIDense, 1197 0, 1198 0, 1199 /* 94*/ 0, 1200 0, 1201 0, 1202 0, 1203 0, 1204 /* 99*/ 0, 1205 0, 1206 0, 1207 MatConjugate_MPIDense, 1208 0, 1209 /*104*/ 0, 1210 MatRealPart_MPIDense, 1211 MatImaginaryPart_MPIDense, 1212 0, 1213 0, 1214 /*109*/ 0, 1215 0, 1216 0, 1217 0, 1218 MatMissingDiagonal_MPIDense, 1219 /*114*/ 0, 1220 0, 1221 0, 1222 0, 1223 0, 1224 /*119*/ 0, 1225 0, 1226 0, 1227 0, 1228 0, 1229 /*124*/ 0, 1230 MatGetColumnNorms_MPIDense, 1231 0, 1232 0, 1233 0, 1234 /*129*/ 0, 1235 0, 1236 0, 1237 0, 1238 0, 1239 /*134*/ 0, 1240 0, 1241 0, 1242 0, 1243 0, 1244 /*139*/ 0, 1245 0, 1246 0 1247 }; 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1251 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1252 { 1253 Mat_MPIDense *a; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 mat->preallocated = PETSC_TRUE; 1258 /* Note: For now, when data is specified above, this assumes the user correctly 1259 allocates the local dense storage space. We should add error checking. */ 1260 1261 a = (Mat_MPIDense*)mat->data; 1262 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1263 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1264 a->nvec = mat->cmap->n; 1265 1266 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1267 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1268 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1269 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1270 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #if defined(PETSC_HAVE_ELEMENTAL) 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "MatConvert_MPIDense_Elemental" 1277 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1278 { 1279 Mat mat_elemental; 1280 PetscErrorCode ierr; 1281 PetscScalar *array,*v_rowwise; 1282 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,j,k,*rows,*cols; 1283 1284 PetscFunctionBegin; 1285 ierr = PetscMalloc3(m*N,&v_rowwise,m,&rows,N,&cols);CHKERRQ(ierr); 1286 ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr); 1287 /* convert column-wise array into row-wise v_rowwise, see MatSetValues_Elemental() */ 1288 k = 0; 1289 for (j=0; j<N; j++) { 1290 cols[j] = j; 1291 for (i=0; i<m; i++) { 1292 v_rowwise[i*N+j] = array[k++]; 1293 } 1294 } 1295 for (i=0; i<m; i++) { 1296 rows[i] = rstart + i; 1297 } 1298 ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr); 1299 1300 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1301 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1302 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1303 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1304 1305 /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1306 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v_rowwise,ADD_VALUES);CHKERRQ(ierr); 1307 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1308 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1309 ierr = PetscFree3(v_rowwise,rows,cols);CHKERRQ(ierr); 1310 1311 if (reuse == MAT_REUSE_MATRIX) { 1312 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1313 } else { 1314 *newmat = mat_elemental; 1315 } 1316 PetscFunctionReturn(0); 1317 } 1318 #endif 1319 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatCreate_MPIDense" 1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1323 { 1324 Mat_MPIDense *a; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1329 mat->data = (void*)a; 1330 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1331 1332 mat->insertmode = NOT_SET_VALUES; 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1334 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1335 1336 /* build cache for off array entries formed */ 1337 a->donotstash = PETSC_FALSE; 1338 1339 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1340 1341 /* stuff used for matrix vector multiply */ 1342 a->lvec = 0; 1343 a->Mvctx = 0; 1344 a->roworiented = PETSC_TRUE; 1345 1346 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1347 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1348 #if defined(PETSC_HAVE_ELEMENTAL) 1349 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1350 #endif 1351 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1352 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1353 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1354 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1355 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1356 1357 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1358 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1359 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1360 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 /*MC 1365 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1366 1367 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1368 and MATMPIDENSE otherwise. 1369 1370 Options Database Keys: 1371 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1372 1373 Level: beginner 1374 1375 1376 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1377 M*/ 1378 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1381 /*@C 1382 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1383 1384 Not collective 1385 1386 Input Parameters: 1387 . B - the matrix 1388 - data - optional location of matrix data. Set data=NULL for PETSc 1389 to control all matrix memory allocation. 1390 1391 Notes: 1392 The dense format is fully compatible with standard Fortran 77 1393 storage by columns. 1394 1395 The data input variable is intended primarily for Fortran programmers 1396 who wish to allocate their own matrix memory space. Most users should 1397 set data=NULL. 1398 1399 Level: intermediate 1400 1401 .keywords: matrix,dense, parallel 1402 1403 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1404 @*/ 1405 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1406 { 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatCreateDense" 1416 /*@C 1417 MatCreateDense - Creates a parallel matrix in dense format. 1418 1419 Collective on MPI_Comm 1420 1421 Input Parameters: 1422 + comm - MPI communicator 1423 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1424 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1425 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1426 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1427 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1428 to control all matrix memory allocation. 1429 1430 Output Parameter: 1431 . A - the matrix 1432 1433 Notes: 1434 The dense format is fully compatible with standard Fortran 77 1435 storage by columns. 1436 1437 The data input variable is intended primarily for Fortran programmers 1438 who wish to allocate their own matrix memory space. Most users should 1439 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 1440 1441 The user MUST specify either the local or global matrix dimensions 1442 (possibly both). 1443 1444 Level: intermediate 1445 1446 .keywords: matrix,dense, parallel 1447 1448 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1449 @*/ 1450 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1451 { 1452 PetscErrorCode ierr; 1453 PetscMPIInt size; 1454 1455 PetscFunctionBegin; 1456 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1457 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1458 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1459 if (size > 1) { 1460 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1461 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1462 if (data) { /* user provided data array, so no need to assemble */ 1463 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 1464 (*A)->assembled = PETSC_TRUE; 1465 } 1466 } else { 1467 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1468 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatDuplicate_MPIDense" 1475 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1476 { 1477 Mat mat; 1478 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 *newmat = 0; 1483 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1484 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1485 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1486 a = (Mat_MPIDense*)mat->data; 1487 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1488 1489 mat->factortype = A->factortype; 1490 mat->assembled = PETSC_TRUE; 1491 mat->preallocated = PETSC_TRUE; 1492 1493 a->size = oldmat->size; 1494 a->rank = oldmat->rank; 1495 mat->insertmode = NOT_SET_VALUES; 1496 a->nvec = oldmat->nvec; 1497 a->donotstash = oldmat->donotstash; 1498 1499 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1500 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1501 1502 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1503 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1504 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1505 1506 *newmat = mat; 1507 PetscFunctionReturn(0); 1508 } 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1512 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 1513 { 1514 PetscErrorCode ierr; 1515 PetscMPIInt rank,size; 1516 const PetscInt *rowners; 1517 PetscInt i,m,n,nz,j,mMax; 1518 PetscScalar *array,*vals,*vals_ptr; 1519 Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 1520 1521 PetscFunctionBegin; 1522 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1523 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1524 1525 /* determine ownership of rows and columns */ 1526 m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 1527 n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 1528 1529 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1530 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1531 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1532 } 1533 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1534 ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 1535 ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1536 ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1537 if (!rank) { 1538 ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 1539 1540 /* read in my part of the matrix numerical values */ 1541 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1542 1543 /* insert into matrix-by row (this is why cannot directly read into array */ 1544 vals_ptr = vals; 1545 for (i=0; i<m; i++) { 1546 for (j=0; j<N; j++) { 1547 array[i + j*m] = *vals_ptr++; 1548 } 1549 } 1550 1551 /* read in other processors and ship out */ 1552 for (i=1; i<size; i++) { 1553 nz = (rowners[i+1] - rowners[i])*N; 1554 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1555 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1556 } 1557 } else { 1558 /* receive numeric values */ 1559 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1560 1561 /* receive message of values*/ 1562 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1563 1564 /* insert into matrix-by row (this is why cannot directly read into array */ 1565 vals_ptr = vals; 1566 for (i=0; i<m; i++) { 1567 for (j=0; j<N; j++) { 1568 array[i + j*m] = *vals_ptr++; 1569 } 1570 } 1571 } 1572 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1573 ierr = PetscFree(vals);CHKERRQ(ierr); 1574 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1575 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatLoad_MPIDense" 1581 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1582 { 1583 Mat_MPIDense *a; 1584 PetscScalar *vals,*svals; 1585 MPI_Comm comm; 1586 MPI_Status status; 1587 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 1588 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1589 PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 1590 PetscInt i,nz,j,rstart,rend; 1591 int fd; 1592 PetscErrorCode ierr; 1593 1594 PetscFunctionBegin; 1595 /* force binary viewer to load .info file if it has not yet done so */ 1596 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1597 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1601 if (!rank) { 1602 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1603 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1604 } 1605 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1606 M = header[1]; N = header[2]; nz = header[3]; 1607 1608 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1609 if (newmat->rmap->N < 0) newmat->rmap->N = M; 1610 if (newmat->cmap->N < 0) newmat->cmap->N = N; 1611 1612 if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 1613 if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 1614 1615 /* 1616 Handle case where matrix is stored on disk as a dense matrix 1617 */ 1618 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1619 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 /* determine ownership of all rows */ 1624 if (newmat->rmap->n < 0) { 1625 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1626 } else { 1627 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1628 } 1629 if (newmat->cmap->n < 0) { 1630 n = PETSC_DECIDE; 1631 } else { 1632 ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 1633 } 1634 1635 ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 1636 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1637 rowners[0] = 0; 1638 for (i=2; i<=size; i++) { 1639 rowners[i] += rowners[i-1]; 1640 } 1641 rstart = rowners[rank]; 1642 rend = rowners[rank+1]; 1643 1644 /* distribute row lengths to all processors */ 1645 ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 1646 if (!rank) { 1647 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1648 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1649 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1650 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1651 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1652 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1653 } else { 1654 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1655 } 1656 1657 if (!rank) { 1658 /* calculate the number of nonzeros on each processor */ 1659 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1660 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1661 for (i=0; i<size; i++) { 1662 for (j=rowners[i]; j< rowners[i+1]; j++) { 1663 procsnz[i] += rowlengths[j]; 1664 } 1665 } 1666 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1667 1668 /* determine max buffer needed and allocate it */ 1669 maxnz = 0; 1670 for (i=0; i<size; i++) { 1671 maxnz = PetscMax(maxnz,procsnz[i]); 1672 } 1673 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1674 1675 /* read in my part of the matrix column indices */ 1676 nz = procsnz[0]; 1677 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1678 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1679 1680 /* read in every one elses and ship off */ 1681 for (i=1; i<size; i++) { 1682 nz = procsnz[i]; 1683 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1684 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1685 } 1686 ierr = PetscFree(cols);CHKERRQ(ierr); 1687 } else { 1688 /* determine buffer space needed for message */ 1689 nz = 0; 1690 for (i=0; i<m; i++) { 1691 nz += ourlens[i]; 1692 } 1693 ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 1694 1695 /* receive message of column indices*/ 1696 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1697 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1698 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1699 } 1700 1701 ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1702 a = (Mat_MPIDense*)newmat->data; 1703 if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 1704 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1705 } 1706 1707 if (!rank) { 1708 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1709 1710 /* read in my part of the matrix numerical values */ 1711 nz = procsnz[0]; 1712 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1713 1714 /* insert into matrix */ 1715 jj = rstart; 1716 smycols = mycols; 1717 svals = vals; 1718 for (i=0; i<m; i++) { 1719 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1720 smycols += ourlens[i]; 1721 svals += ourlens[i]; 1722 jj++; 1723 } 1724 1725 /* read in other processors and ship out */ 1726 for (i=1; i<size; i++) { 1727 nz = procsnz[i]; 1728 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1729 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1730 } 1731 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1732 } else { 1733 /* receive numeric values */ 1734 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 1735 1736 /* receive message of values*/ 1737 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1738 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1739 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1740 1741 /* insert into matrix */ 1742 jj = rstart; 1743 smycols = mycols; 1744 svals = vals; 1745 for (i=0; i<m; i++) { 1746 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1747 smycols += ourlens[i]; 1748 svals += ourlens[i]; 1749 jj++; 1750 } 1751 } 1752 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1753 ierr = PetscFree(vals);CHKERRQ(ierr); 1754 ierr = PetscFree(mycols);CHKERRQ(ierr); 1755 ierr = PetscFree(rowners);CHKERRQ(ierr); 1756 1757 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1758 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1759 PetscFunctionReturn(0); 1760 } 1761 1762 #undef __FUNCT__ 1763 #define __FUNCT__ "MatEqual_MPIDense" 1764 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1765 { 1766 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1767 Mat a,b; 1768 PetscBool flg; 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 a = matA->A; 1773 b = matB->A; 1774 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1775 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 1779