1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatDenseGetLocalMatrix" 12 /*@ 13 14 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 15 matrix that represents the operator. For sequential matrices it returns itself. 16 17 Input Parameter: 18 . A - the Seq or MPI dense matrix 19 20 Output Parameter: 21 . B - the inner matrix 22 23 Level: intermediate 24 25 @*/ 26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 27 { 28 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 29 PetscErrorCode ierr; 30 PetscBool flg; 31 32 PetscFunctionBegin; 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatGetRow_MPIDense" 41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 42 { 43 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 44 PetscErrorCode ierr; 45 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 46 47 PetscFunctionBegin; 48 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 49 lrow = row - rstart; 50 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatRestoreRow_MPIDense" 56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 57 { 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 62 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 68 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 69 { 70 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 71 PetscErrorCode ierr; 72 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 73 PetscScalar *array; 74 MPI_Comm comm; 75 Mat B; 76 77 PetscFunctionBegin; 78 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 79 80 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 81 if (!B) { 82 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 83 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 84 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 85 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 86 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 88 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 89 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 92 *a = B; 93 ierr = MatDestroy(&B);CHKERRQ(ierr); 94 } else *a = B; 95 PetscFunctionReturn(0); 96 } 97 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatSetValues_MPIDense" 100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 101 { 102 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 103 PetscErrorCode ierr; 104 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 105 PetscBool roworiented = A->roworiented; 106 107 PetscFunctionBegin; 108 if (v) PetscValidScalarPointer(v,6); 109 for (i=0; i<m; i++) { 110 if (idxm[i] < 0) continue; 111 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 112 if (idxm[i] >= rstart && idxm[i] < rend) { 113 row = idxm[i] - rstart; 114 if (roworiented) { 115 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 116 } else { 117 for (j=0; j<n; j++) { 118 if (idxn[j] < 0) continue; 119 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 120 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 121 } 122 } 123 } else if (!A->donotstash) { 124 mat->assembled = PETSC_FALSE; 125 if (roworiented) { 126 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 127 } else { 128 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 129 } 130 } 131 } 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatGetValues_MPIDense" 137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 138 { 139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 140 PetscErrorCode ierr; 141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 142 143 PetscFunctionBegin; 144 for (i=0; i<m; i++) { 145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 147 if (idxm[i] >= rstart && idxm[i] < rend) { 148 row = idxm[i] - rstart; 149 for (j=0; j<n; j++) { 150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 153 } 154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 155 } 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatDenseGetArray_MPIDense" 161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 174 { 175 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 176 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 177 PetscErrorCode ierr; 178 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 179 const PetscInt *irow,*icol; 180 PetscScalar *av,*bv,*v = lmat->v; 181 Mat newmat; 182 IS iscol_local; 183 184 PetscFunctionBegin; 185 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 186 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 187 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 189 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 190 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 191 192 /* No parallel redistribution currently supported! Should really check each index set 193 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 194 original matrix! */ 195 196 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 197 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 198 199 /* Check submatrix call */ 200 if (scall == MAT_REUSE_MATRIX) { 201 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 202 /* Really need to test rows and column sizes! */ 203 newmat = *B; 204 } else { 205 /* Create and fill new matrix */ 206 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 207 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 208 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 209 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 210 } 211 212 /* Now extract the data pointers and do the copy, column at a time */ 213 newmatd = (Mat_MPIDense*)newmat->data; 214 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 215 216 for (i=0; i<Ncols; i++) { 217 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 218 for (j=0; j<nrows; j++) { 219 *bv++ = av[irow[j] - rstart]; 220 } 221 } 222 223 /* Assemble the matrices so that the correct flags are set */ 224 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 226 227 /* Free work space */ 228 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 229 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 230 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 231 *B = newmat; 232 PetscFunctionReturn(0); 233 } 234 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 238 { 239 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 240 PetscErrorCode ierr; 241 242 PetscFunctionBegin; 243 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 250 { 251 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 252 MPI_Comm comm; 253 PetscErrorCode ierr; 254 PetscInt nstash,reallocs; 255 InsertMode addv; 256 257 PetscFunctionBegin; 258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 259 /* make sure all processors are either in INSERTMODE or ADDMODE */ 260 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 261 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 262 mat->insertmode = addv; /* in case this processor had no cache */ 263 264 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 265 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 266 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 273 { 274 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 275 PetscErrorCode ierr; 276 PetscInt i,*row,*col,flg,j,rstart,ncols; 277 PetscMPIInt n; 278 PetscScalar *val; 279 InsertMode addv=mat->insertmode; 280 281 PetscFunctionBegin; 282 /* wait on receives */ 283 while (1) { 284 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 285 if (!flg) break; 286 287 for (i=0; i<n;) { 288 /* Now identify the consecutive vals belonging to the same row */ 289 for (j=i,rstart=row[j]; j<n; j++) { 290 if (row[j] != rstart) break; 291 } 292 if (j < n) ncols = j-i; 293 else ncols = n-i; 294 /* Now assemble all these values with a single function call */ 295 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 296 i = j; 297 } 298 } 299 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 300 301 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 302 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 303 304 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 305 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 306 } 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatZeroEntries_MPIDense" 312 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 313 { 314 PetscErrorCode ierr; 315 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 316 317 PetscFunctionBegin; 318 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321 322 /* the code does not do the diagonal entries correctly unless the 323 matrix is square and the column and row owerships are identical. 324 This is a BUG. The only way to fix it seems to be to access 325 mdn->A and mdn->B directly and not through the MatZeroRows() 326 routine. 327 */ 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatZeroRows_MPIDense" 330 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 331 { 332 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 333 PetscErrorCode ierr; 334 PetscInt i,*owners = A->rmap->range; 335 PetscInt *nprocs,j,idx,nsends; 336 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 337 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 338 PetscInt *lens,*lrows,*values; 339 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 340 MPI_Comm comm; 341 MPI_Request *send_waits,*recv_waits; 342 MPI_Status recv_status,*send_status; 343 PetscBool found; 344 const PetscScalar *xx; 345 PetscScalar *bb; 346 347 PetscFunctionBegin; 348 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 349 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 350 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 351 /* first count number of contributors to each processor */ 352 ierr = PetscMalloc1(2*size,&nprocs);CHKERRQ(ierr); 353 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 354 ierr = PetscMalloc1((N+1),&owner);CHKERRQ(ierr); /* see note*/ 355 for (i=0; i<N; i++) { 356 idx = rows[i]; 357 found = PETSC_FALSE; 358 for (j=0; j<size; j++) { 359 if (idx >= owners[j] && idx < owners[j+1]) { 360 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 361 } 362 } 363 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 364 } 365 nsends = 0; 366 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 367 368 /* inform other processors of number of messages and max length*/ 369 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 370 371 /* post receives: */ 372 ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 373 ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr); 374 for (i=0; i<nrecvs; i++) { 375 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 376 } 377 378 /* do sends: 379 1) starts[i] gives the starting index in svalues for stuff going to 380 the ith processor 381 */ 382 ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr); 383 ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr); 384 ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr); 385 386 starts[0] = 0; 387 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 388 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 389 390 starts[0] = 0; 391 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 392 count = 0; 393 for (i=0; i<size; i++) { 394 if (nprocs[2*i+1]) { 395 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 396 } 397 } 398 ierr = PetscFree(starts);CHKERRQ(ierr); 399 400 base = owners[rank]; 401 402 /* wait on receives */ 403 ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 404 count = nrecvs; 405 slen = 0; 406 while (count) { 407 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 408 /* unpack receives into our local space */ 409 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 410 411 source[imdex] = recv_status.MPI_SOURCE; 412 lens[imdex] = n; 413 slen += n; 414 count--; 415 } 416 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 417 418 /* move the data into the send scatter */ 419 ierr = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr); 420 count = 0; 421 for (i=0; i<nrecvs; i++) { 422 values = rvalues + i*nmax; 423 for (j=0; j<lens[i]; j++) { 424 lrows[count++] = values[j] - base; 425 } 426 } 427 ierr = PetscFree(rvalues);CHKERRQ(ierr); 428 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 429 ierr = PetscFree(owner);CHKERRQ(ierr); 430 ierr = PetscFree(nprocs);CHKERRQ(ierr); 431 432 /* fix right hand side if needed */ 433 if (x && b) { 434 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 435 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 436 for (i=0; i<slen; i++) { 437 bb[lrows[i]] = diag*xx[lrows[i]]; 438 } 439 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 440 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 441 } 442 443 /* actually zap the local rows */ 444 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 445 if (diag != 0.0) { 446 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 447 PetscInt m = ll->lda, i; 448 449 for (i=0; i<slen; i++) { 450 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 451 } 452 } 453 ierr = PetscFree(lrows);CHKERRQ(ierr); 454 455 /* wait on sends */ 456 if (nsends) { 457 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 458 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 459 ierr = PetscFree(send_status);CHKERRQ(ierr); 460 } 461 ierr = PetscFree(send_waits);CHKERRQ(ierr); 462 ierr = PetscFree(svalues);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatMult_MPIDense" 468 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 469 { 470 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 471 PetscErrorCode ierr; 472 473 PetscFunctionBegin; 474 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 476 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 477 PetscFunctionReturn(0); 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatMultAdd_MPIDense" 482 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 483 { 484 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 489 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 491 PetscFunctionReturn(0); 492 } 493 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatMultTranspose_MPIDense" 496 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 497 { 498 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 499 PetscErrorCode ierr; 500 PetscScalar zero = 0.0; 501 502 PetscFunctionBegin; 503 ierr = VecSet(yy,zero);CHKERRQ(ierr); 504 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 505 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 506 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 512 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 513 { 514 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 515 PetscErrorCode ierr; 516 517 PetscFunctionBegin; 518 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 519 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 520 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 521 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatGetDiagonal_MPIDense" 527 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 528 { 529 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 530 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 531 PetscErrorCode ierr; 532 PetscInt len,i,n,m = A->rmap->n,radd; 533 PetscScalar *x,zero = 0.0; 534 535 PetscFunctionBegin; 536 ierr = VecSet(v,zero);CHKERRQ(ierr); 537 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 538 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 539 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 540 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 541 radd = A->rmap->rstart*m; 542 for (i=0; i<len; i++) { 543 x[i] = aloc->v[radd + i*m + i]; 544 } 545 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatDestroy_MPIDense" 551 PetscErrorCode MatDestroy_MPIDense(Mat mat) 552 { 553 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 554 PetscErrorCode ierr; 555 556 PetscFunctionBegin; 557 #if defined(PETSC_USE_LOG) 558 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 559 #endif 560 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 561 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 562 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 563 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 564 565 ierr = PetscFree(mat->data);CHKERRQ(ierr); 566 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatView_MPIDense_Binary" 577 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 578 { 579 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 580 PetscErrorCode ierr; 581 PetscViewerFormat format; 582 int fd; 583 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 584 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 585 PetscScalar *work,*v,*vv; 586 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 587 588 PetscFunctionBegin; 589 if (mdn->size == 1) { 590 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 591 } else { 592 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 593 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 594 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 595 596 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 597 if (format == PETSC_VIEWER_NATIVE) { 598 599 if (!rank) { 600 /* store the matrix as a dense matrix */ 601 header[0] = MAT_FILE_CLASSID; 602 header[1] = mat->rmap->N; 603 header[2] = N; 604 header[3] = MATRIX_BINARY_FORMAT_DENSE; 605 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 606 607 /* get largest work array needed for transposing array */ 608 mmax = mat->rmap->n; 609 for (i=1; i<size; i++) { 610 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 611 } 612 ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 613 614 /* write out local array, by rows */ 615 m = mat->rmap->n; 616 v = a->v; 617 for (j=0; j<N; j++) { 618 for (i=0; i<m; i++) { 619 work[j + i*N] = *v++; 620 } 621 } 622 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 623 /* get largest work array to receive messages from other processes, excludes process zero */ 624 mmax = 0; 625 for (i=1; i<size; i++) { 626 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 627 } 628 ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 629 for (k = 1; k < size; k++) { 630 v = vv; 631 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 632 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 633 634 for (j = 0; j < N; j++) { 635 for (i = 0; i < m; i++) { 636 work[j + i*N] = *v++; 637 } 638 } 639 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 640 } 641 ierr = PetscFree(work);CHKERRQ(ierr); 642 ierr = PetscFree(vv);CHKERRQ(ierr); 643 } else { 644 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 645 } 646 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 647 } 648 PetscFunctionReturn(0); 649 } 650 651 #include <petscdraw.h> 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 655 { 656 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 657 PetscErrorCode ierr; 658 PetscMPIInt size = mdn->size,rank = mdn->rank; 659 PetscViewerType vtype; 660 PetscBool iascii,isdraw; 661 PetscViewer sviewer; 662 PetscViewerFormat format; 663 664 PetscFunctionBegin; 665 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 666 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 667 if (iascii) { 668 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 669 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 670 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 671 MatInfo info; 672 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 673 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 674 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 675 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 676 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 677 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } else if (format == PETSC_VIEWER_ASCII_INFO) { 680 PetscFunctionReturn(0); 681 } 682 } else if (isdraw) { 683 PetscDraw draw; 684 PetscBool isnull; 685 686 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 687 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 688 if (isnull) PetscFunctionReturn(0); 689 } 690 691 if (size == 1) { 692 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 693 } else { 694 /* assemble the entire matrix onto first processor. */ 695 Mat A; 696 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 697 PetscInt *cols; 698 PetscScalar *vals; 699 700 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 701 if (!rank) { 702 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 703 } else { 704 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 705 } 706 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 707 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 708 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 709 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 710 711 /* Copy the matrix ... This isn't the most efficient means, 712 but it's quick for now */ 713 A->insertmode = INSERT_VALUES; 714 715 row = mat->rmap->rstart; 716 m = mdn->A->rmap->n; 717 for (i=0; i<m; i++) { 718 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 719 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 720 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 721 row++; 722 } 723 724 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 725 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 726 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 727 if (!rank) { 728 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 729 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 730 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 731 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 732 } 733 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 734 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 735 ierr = MatDestroy(&A);CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIDense" 742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 743 { 744 PetscErrorCode ierr; 745 PetscBool iascii,isbinary,isdraw,issocket; 746 747 PetscFunctionBegin; 748 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 751 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 752 753 if (iascii || issocket || isdraw) { 754 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 755 } else if (isbinary) { 756 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatGetInfo_MPIDense" 763 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 764 { 765 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 766 Mat mdn = mat->A; 767 PetscErrorCode ierr; 768 PetscReal isend[5],irecv[5]; 769 770 PetscFunctionBegin; 771 info->block_size = 1.0; 772 773 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 774 775 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 776 isend[3] = info->memory; isend[4] = info->mallocs; 777 if (flag == MAT_LOCAL) { 778 info->nz_used = isend[0]; 779 info->nz_allocated = isend[1]; 780 info->nz_unneeded = isend[2]; 781 info->memory = isend[3]; 782 info->mallocs = isend[4]; 783 } else if (flag == MAT_GLOBAL_MAX) { 784 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 785 786 info->nz_used = irecv[0]; 787 info->nz_allocated = irecv[1]; 788 info->nz_unneeded = irecv[2]; 789 info->memory = irecv[3]; 790 info->mallocs = irecv[4]; 791 } else if (flag == MAT_GLOBAL_SUM) { 792 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 793 794 info->nz_used = irecv[0]; 795 info->nz_allocated = irecv[1]; 796 info->nz_unneeded = irecv[2]; 797 info->memory = irecv[3]; 798 info->mallocs = irecv[4]; 799 } 800 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 801 info->fill_ratio_needed = 0; 802 info->factor_mallocs = 0; 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatSetOption_MPIDense" 808 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 809 { 810 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 811 PetscErrorCode ierr; 812 813 PetscFunctionBegin; 814 switch (op) { 815 case MAT_NEW_NONZERO_LOCATIONS: 816 case MAT_NEW_NONZERO_LOCATION_ERR: 817 case MAT_NEW_NONZERO_ALLOCATION_ERR: 818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 819 break; 820 case MAT_ROW_ORIENTED: 821 a->roworiented = flg; 822 823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 824 break; 825 case MAT_NEW_DIAGONALS: 826 case MAT_KEEP_NONZERO_PATTERN: 827 case MAT_USE_HASH_TABLE: 828 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 829 break; 830 case MAT_IGNORE_OFF_PROC_ENTRIES: 831 a->donotstash = flg; 832 break; 833 case MAT_SYMMETRIC: 834 case MAT_STRUCTURALLY_SYMMETRIC: 835 case MAT_HERMITIAN: 836 case MAT_SYMMETRY_ETERNAL: 837 case MAT_IGNORE_LOWER_TRIANGULAR: 838 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 839 break; 840 default: 841 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 842 } 843 PetscFunctionReturn(0); 844 } 845 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatDiagonalScale_MPIDense" 849 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 850 { 851 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 852 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 853 PetscScalar *l,*r,x,*v; 854 PetscErrorCode ierr; 855 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 856 857 PetscFunctionBegin; 858 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 859 if (ll) { 860 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 861 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 862 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 863 for (i=0; i<m; i++) { 864 x = l[i]; 865 v = mat->v + i; 866 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 867 } 868 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 869 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 870 } 871 if (rr) { 872 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 873 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 874 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 877 for (i=0; i<n; i++) { 878 x = r[i]; 879 v = mat->v + i*m; 880 for (j=0; j<m; j++) (*v++) *= x; 881 } 882 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 883 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 884 } 885 PetscFunctionReturn(0); 886 } 887 888 #undef __FUNCT__ 889 #define __FUNCT__ "MatNorm_MPIDense" 890 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 891 { 892 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 893 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 894 PetscErrorCode ierr; 895 PetscInt i,j; 896 PetscReal sum = 0.0; 897 PetscScalar *v = mat->v; 898 899 PetscFunctionBegin; 900 if (mdn->size == 1) { 901 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 902 } else { 903 if (type == NORM_FROBENIUS) { 904 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 905 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 906 } 907 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 908 *nrm = PetscSqrtReal(*nrm); 909 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 910 } else if (type == NORM_1) { 911 PetscReal *tmp,*tmp2; 912 ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 913 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 914 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 915 *nrm = 0.0; 916 v = mat->v; 917 for (j=0; j<mdn->A->cmap->n; j++) { 918 for (i=0; i<mdn->A->rmap->n; i++) { 919 tmp[j] += PetscAbsScalar(*v); v++; 920 } 921 } 922 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 923 for (j=0; j<A->cmap->N; j++) { 924 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 925 } 926 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 927 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 928 } else if (type == NORM_INFINITY) { /* max row norm */ 929 PetscReal ntemp; 930 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 931 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 932 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 933 } 934 PetscFunctionReturn(0); 935 } 936 937 #undef __FUNCT__ 938 #define __FUNCT__ "MatTranspose_MPIDense" 939 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 940 { 941 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 942 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 943 Mat B; 944 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 945 PetscErrorCode ierr; 946 PetscInt j,i; 947 PetscScalar *v; 948 949 PetscFunctionBegin; 950 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 951 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 952 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 953 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 954 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 955 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 956 } else { 957 B = *matout; 958 } 959 960 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 961 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 962 for (i=0; i<m; i++) rwork[i] = rstart + i; 963 for (j=0; j<n; j++) { 964 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 965 v += m; 966 } 967 ierr = PetscFree(rwork);CHKERRQ(ierr); 968 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 971 *matout = B; 972 } else { 973 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 974 } 975 PetscFunctionReturn(0); 976 } 977 978 979 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 980 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "MatSetUp_MPIDense" 984 PetscErrorCode MatSetUp_MPIDense(Mat A) 985 { 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatAXPY_MPIDense" 995 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 996 { 997 PetscErrorCode ierr; 998 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 999 1000 PetscFunctionBegin; 1001 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatConjugate_MPIDense" 1007 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1008 { 1009 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatRealPart_MPIDense" 1019 PetscErrorCode MatRealPart_MPIDense(Mat A) 1020 { 1021 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1022 PetscErrorCode ierr; 1023 1024 PetscFunctionBegin; 1025 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1031 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1032 { 1033 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1034 PetscErrorCode ierr; 1035 1036 PetscFunctionBegin; 1037 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1044 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1045 { 1046 PetscErrorCode ierr; 1047 PetscInt i,n; 1048 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1049 PetscReal *work; 1050 1051 PetscFunctionBegin; 1052 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1053 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1054 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1055 if (type == NORM_2) { 1056 for (i=0; i<n; i++) work[i] *= work[i]; 1057 } 1058 if (type == NORM_INFINITY) { 1059 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1060 } else { 1061 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(work);CHKERRQ(ierr); 1064 if (type == NORM_2) { 1065 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatSetRandom_MPIDense" 1072 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1073 { 1074 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1075 PetscErrorCode ierr; 1076 PetscScalar *a; 1077 PetscInt m,n,i; 1078 1079 PetscFunctionBegin; 1080 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1081 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1082 for (i=0; i<m*n; i++) { 1083 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1084 } 1085 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 /* -------------------------------------------------------------------*/ 1090 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1091 MatGetRow_MPIDense, 1092 MatRestoreRow_MPIDense, 1093 MatMult_MPIDense, 1094 /* 4*/ MatMultAdd_MPIDense, 1095 MatMultTranspose_MPIDense, 1096 MatMultTransposeAdd_MPIDense, 1097 0, 1098 0, 1099 0, 1100 /* 10*/ 0, 1101 0, 1102 0, 1103 0, 1104 MatTranspose_MPIDense, 1105 /* 15*/ MatGetInfo_MPIDense, 1106 MatEqual_MPIDense, 1107 MatGetDiagonal_MPIDense, 1108 MatDiagonalScale_MPIDense, 1109 MatNorm_MPIDense, 1110 /* 20*/ MatAssemblyBegin_MPIDense, 1111 MatAssemblyEnd_MPIDense, 1112 MatSetOption_MPIDense, 1113 MatZeroEntries_MPIDense, 1114 /* 24*/ MatZeroRows_MPIDense, 1115 0, 1116 0, 1117 0, 1118 0, 1119 /* 29*/ MatSetUp_MPIDense, 1120 0, 1121 0, 1122 0, 1123 0, 1124 /* 34*/ MatDuplicate_MPIDense, 1125 0, 1126 0, 1127 0, 1128 0, 1129 /* 39*/ MatAXPY_MPIDense, 1130 MatGetSubMatrices_MPIDense, 1131 0, 1132 MatGetValues_MPIDense, 1133 0, 1134 /* 44*/ 0, 1135 MatScale_MPIDense, 1136 0, 1137 0, 1138 0, 1139 /* 49*/ MatSetRandom_MPIDense, 1140 0, 1141 0, 1142 0, 1143 0, 1144 /* 54*/ 0, 1145 0, 1146 0, 1147 0, 1148 0, 1149 /* 59*/ MatGetSubMatrix_MPIDense, 1150 MatDestroy_MPIDense, 1151 MatView_MPIDense, 1152 0, 1153 0, 1154 /* 64*/ 0, 1155 0, 1156 0, 1157 0, 1158 0, 1159 /* 69*/ 0, 1160 0, 1161 0, 1162 0, 1163 0, 1164 /* 74*/ 0, 1165 0, 1166 0, 1167 0, 1168 0, 1169 /* 79*/ 0, 1170 0, 1171 0, 1172 0, 1173 /* 83*/ MatLoad_MPIDense, 1174 0, 1175 0, 1176 0, 1177 0, 1178 0, 1179 /* 89*/ 1180 0, 1181 0, 1182 0, 1183 0, 1184 0, 1185 /* 94*/ 0, 1186 0, 1187 0, 1188 0, 1189 0, 1190 /* 99*/ 0, 1191 0, 1192 0, 1193 MatConjugate_MPIDense, 1194 0, 1195 /*104*/ 0, 1196 MatRealPart_MPIDense, 1197 MatImaginaryPart_MPIDense, 1198 0, 1199 0, 1200 /*109*/ 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 /*114*/ 0, 1206 0, 1207 0, 1208 0, 1209 0, 1210 /*119*/ 0, 1211 0, 1212 0, 1213 0, 1214 0, 1215 /*124*/ 0, 1216 MatGetColumnNorms_MPIDense, 1217 0, 1218 0, 1219 0, 1220 /*129*/ 0, 1221 0, 1222 0, 1223 0, 1224 0, 1225 /*134*/ 0, 1226 0, 1227 0, 1228 0, 1229 0, 1230 /*139*/ 0, 1231 0, 1232 0 1233 }; 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1237 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1238 { 1239 Mat_MPIDense *a; 1240 PetscErrorCode ierr; 1241 1242 PetscFunctionBegin; 1243 mat->preallocated = PETSC_TRUE; 1244 /* Note: For now, when data is specified above, this assumes the user correctly 1245 allocates the local dense storage space. We should add error checking. */ 1246 1247 a = (Mat_MPIDense*)mat->data; 1248 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1249 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1250 a->nvec = mat->cmap->n; 1251 1252 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1253 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1254 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1255 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1256 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatCreate_MPIDense" 1262 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1263 { 1264 Mat_MPIDense *a; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1269 mat->data = (void*)a; 1270 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1271 1272 mat->insertmode = NOT_SET_VALUES; 1273 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1274 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1275 1276 /* build cache for off array entries formed */ 1277 a->donotstash = PETSC_FALSE; 1278 1279 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1280 1281 /* stuff used for matrix vector multiply */ 1282 a->lvec = 0; 1283 a->Mvctx = 0; 1284 a->roworiented = PETSC_TRUE; 1285 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1288 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1298 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 /*MC 1303 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1304 1305 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1306 and MATMPIDENSE otherwise. 1307 1308 Options Database Keys: 1309 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1310 1311 Level: beginner 1312 1313 1314 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1315 M*/ 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1319 /*@C 1320 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1321 1322 Not collective 1323 1324 Input Parameters: 1325 . A - the matrix 1326 - data - optional location of matrix data. Set data=NULL for PETSc 1327 to control all matrix memory allocation. 1328 1329 Notes: 1330 The dense format is fully compatible with standard Fortran 77 1331 storage by columns. 1332 1333 The data input variable is intended primarily for Fortran programmers 1334 who wish to allocate their own matrix memory space. Most users should 1335 set data=NULL. 1336 1337 Level: intermediate 1338 1339 .keywords: matrix,dense, parallel 1340 1341 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1342 @*/ 1343 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1344 { 1345 PetscErrorCode ierr; 1346 1347 PetscFunctionBegin; 1348 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr); 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatCreateDense" 1354 /*@C 1355 MatCreateDense - Creates a parallel matrix in dense format. 1356 1357 Collective on MPI_Comm 1358 1359 Input Parameters: 1360 + comm - MPI communicator 1361 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1362 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1363 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1364 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1365 - data - optional location of matrix data. Set data=NULL (NULL_SCALAR for Fortran users) for PETSc 1366 to control all matrix memory allocation. 1367 1368 Output Parameter: 1369 . A - the matrix 1370 1371 Notes: 1372 The dense format is fully compatible with standard Fortran 77 1373 storage by columns. 1374 1375 The data input variable is intended primarily for Fortran programmers 1376 who wish to allocate their own matrix memory space. Most users should 1377 set data=NULL (NULL_SCALAR for Fortran users). 1378 1379 The user MUST specify either the local or global matrix dimensions 1380 (possibly both). 1381 1382 Level: intermediate 1383 1384 .keywords: matrix,dense, parallel 1385 1386 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1387 @*/ 1388 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1389 { 1390 PetscErrorCode ierr; 1391 PetscMPIInt size; 1392 1393 PetscFunctionBegin; 1394 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1395 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1396 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1397 if (size > 1) { 1398 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1399 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1400 } else { 1401 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1402 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1403 } 1404 PetscFunctionReturn(0); 1405 } 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatDuplicate_MPIDense" 1409 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1410 { 1411 Mat mat; 1412 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1413 PetscErrorCode ierr; 1414 1415 PetscFunctionBegin; 1416 *newmat = 0; 1417 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1418 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1419 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1420 a = (Mat_MPIDense*)mat->data; 1421 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1422 1423 mat->factortype = A->factortype; 1424 mat->assembled = PETSC_TRUE; 1425 mat->preallocated = PETSC_TRUE; 1426 1427 a->size = oldmat->size; 1428 a->rank = oldmat->rank; 1429 mat->insertmode = NOT_SET_VALUES; 1430 a->nvec = oldmat->nvec; 1431 a->donotstash = oldmat->donotstash; 1432 1433 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1434 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1435 1436 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1437 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1438 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1439 1440 *newmat = mat; 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1446 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1447 { 1448 PetscErrorCode ierr; 1449 PetscMPIInt rank,size; 1450 PetscInt *rowners,i,m,nz,j; 1451 PetscScalar *array,*vals,*vals_ptr; 1452 1453 PetscFunctionBegin; 1454 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1455 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1456 1457 /* determine ownership of all rows */ 1458 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1459 else m = newmat->rmap->n; 1460 ierr = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr); 1461 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1462 rowners[0] = 0; 1463 for (i=2; i<=size; i++) { 1464 rowners[i] += rowners[i-1]; 1465 } 1466 1467 if (!sizesset) { 1468 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1469 } 1470 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1471 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1472 1473 if (!rank) { 1474 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1475 1476 /* read in my part of the matrix numerical values */ 1477 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1478 1479 /* insert into matrix-by row (this is why cannot directly read into array */ 1480 vals_ptr = vals; 1481 for (i=0; i<m; i++) { 1482 for (j=0; j<N; j++) { 1483 array[i + j*m] = *vals_ptr++; 1484 } 1485 } 1486 1487 /* read in other processors and ship out */ 1488 for (i=1; i<size; i++) { 1489 nz = (rowners[i+1] - rowners[i])*N; 1490 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1491 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1492 } 1493 } else { 1494 /* receive numeric values */ 1495 ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 1496 1497 /* receive message of values*/ 1498 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1499 1500 /* insert into matrix-by row (this is why cannot directly read into array */ 1501 vals_ptr = vals; 1502 for (i=0; i<m; i++) { 1503 for (j=0; j<N; j++) { 1504 array[i + j*m] = *vals_ptr++; 1505 } 1506 } 1507 } 1508 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1509 ierr = PetscFree(rowners);CHKERRQ(ierr); 1510 ierr = PetscFree(vals);CHKERRQ(ierr); 1511 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1512 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatLoad_MPIDense" 1518 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1519 { 1520 PetscScalar *vals,*svals; 1521 MPI_Comm comm; 1522 MPI_Status status; 1523 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1524 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1525 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1526 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1527 int fd; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1533 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1534 if (!rank) { 1535 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1536 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1537 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1538 } 1539 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1540 1541 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1542 M = header[1]; N = header[2]; nz = header[3]; 1543 1544 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1545 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1546 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1547 1548 /* If global sizes are set, check if they are consistent with that given in the file */ 1549 if (sizesset) { 1550 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1551 } 1552 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1553 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1554 1555 /* 1556 Handle case where matrix is stored on disk as a dense matrix 1557 */ 1558 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1559 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 /* determine ownership of all rows */ 1564 if (newmat->rmap->n < 0) { 1565 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1566 } else { 1567 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1568 } 1569 ierr = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr); 1570 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1571 rowners[0] = 0; 1572 for (i=2; i<=size; i++) { 1573 rowners[i] += rowners[i-1]; 1574 } 1575 rstart = rowners[rank]; 1576 rend = rowners[rank+1]; 1577 1578 /* distribute row lengths to all processors */ 1579 ierr = PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);CHKERRQ(ierr); 1580 if (!rank) { 1581 ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 1582 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1583 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 1584 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1585 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1586 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1587 } else { 1588 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1589 } 1590 1591 if (!rank) { 1592 /* calculate the number of nonzeros on each processor */ 1593 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 1594 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1595 for (i=0; i<size; i++) { 1596 for (j=rowners[i]; j< rowners[i+1]; j++) { 1597 procsnz[i] += rowlengths[j]; 1598 } 1599 } 1600 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1601 1602 /* determine max buffer needed and allocate it */ 1603 maxnz = 0; 1604 for (i=0; i<size; i++) { 1605 maxnz = PetscMax(maxnz,procsnz[i]); 1606 } 1607 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 1608 1609 /* read in my part of the matrix column indices */ 1610 nz = procsnz[0]; 1611 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 1612 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1613 1614 /* read in every one elses and ship off */ 1615 for (i=1; i<size; i++) { 1616 nz = procsnz[i]; 1617 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1618 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1619 } 1620 ierr = PetscFree(cols);CHKERRQ(ierr); 1621 } else { 1622 /* determine buffer space needed for message */ 1623 nz = 0; 1624 for (i=0; i<m; i++) { 1625 nz += ourlens[i]; 1626 } 1627 ierr = PetscMalloc1((nz+1),&mycols);CHKERRQ(ierr); 1628 1629 /* receive message of column indices*/ 1630 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1631 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1632 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1633 } 1634 1635 /* loop over local rows, determining number of off diagonal entries */ 1636 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1637 jj = 0; 1638 for (i=0; i<m; i++) { 1639 for (j=0; j<ourlens[i]; j++) { 1640 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1641 jj++; 1642 } 1643 } 1644 1645 /* create our matrix */ 1646 for (i=0; i<m; i++) ourlens[i] -= offlens[i]; 1647 1648 if (!sizesset) { 1649 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1650 } 1651 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1652 for (i=0; i<m; i++) ourlens[i] += offlens[i]; 1653 1654 if (!rank) { 1655 ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 1656 1657 /* read in my part of the matrix numerical values */ 1658 nz = procsnz[0]; 1659 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1660 1661 /* insert into matrix */ 1662 jj = rstart; 1663 smycols = mycols; 1664 svals = vals; 1665 for (i=0; i<m; i++) { 1666 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1667 smycols += ourlens[i]; 1668 svals += ourlens[i]; 1669 jj++; 1670 } 1671 1672 /* read in other processors and ship out */ 1673 for (i=1; i<size; i++) { 1674 nz = procsnz[i]; 1675 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1676 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1677 } 1678 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1679 } else { 1680 /* receive numeric values */ 1681 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 1682 1683 /* receive message of values*/ 1684 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1685 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1686 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1687 1688 /* insert into matrix */ 1689 jj = rstart; 1690 smycols = mycols; 1691 svals = vals; 1692 for (i=0; i<m; i++) { 1693 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1694 smycols += ourlens[i]; 1695 svals += ourlens[i]; 1696 jj++; 1697 } 1698 } 1699 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1700 ierr = PetscFree(vals);CHKERRQ(ierr); 1701 ierr = PetscFree(mycols);CHKERRQ(ierr); 1702 ierr = PetscFree(rowners);CHKERRQ(ierr); 1703 1704 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1705 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatEqual_MPIDense" 1711 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1712 { 1713 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1714 Mat a,b; 1715 PetscBool flg; 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 a = matA->A; 1720 b = matB->A; 1721 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1722 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726