1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 7 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscBool flg; 30 31 PetscFunctionBegin; 32 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) *B = mat->A; 34 else *B = A; 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatGetRow_MPIDense" 40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 41 { 42 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 43 PetscErrorCode ierr; 44 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 45 46 PetscFunctionBegin; 47 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 48 lrow = row - rstart; 49 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 #undef __FUNCT__ 54 #define __FUNCT__ "MatRestoreRow_MPIDense" 55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 56 { 57 PetscErrorCode ierr; 58 59 PetscFunctionBegin; 60 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 61 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 62 PetscFunctionReturn(0); 63 } 64 65 #undef __FUNCT__ 66 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 67 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 68 { 69 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 70 PetscErrorCode ierr; 71 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 72 PetscScalar *array; 73 MPI_Comm comm; 74 Mat B; 75 76 PetscFunctionBegin; 77 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 79 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 80 if (!B) { 81 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 82 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 83 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 84 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 86 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 87 ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 88 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 89 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 90 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 91 *a = B; 92 ierr = MatDestroy(&B);CHKERRQ(ierr); 93 } else *a = B; 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatSetValues_MPIDense" 99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 100 { 101 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102 PetscErrorCode ierr; 103 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104 PetscBool roworiented = A->roworiented; 105 106 PetscFunctionBegin; 107 if (v) PetscValidScalarPointer(v,6); 108 for (i=0; i<m; i++) { 109 if (idxm[i] < 0) continue; 110 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 111 if (idxm[i] >= rstart && idxm[i] < rend) { 112 row = idxm[i] - rstart; 113 if (roworiented) { 114 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 115 } else { 116 for (j=0; j<n; j++) { 117 if (idxn[j] < 0) continue; 118 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 119 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 120 } 121 } 122 } else if (!A->donotstash) { 123 mat->assembled = PETSC_FALSE; 124 if (roworiented) { 125 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 126 } else { 127 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 128 } 129 } 130 } 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatGetValues_MPIDense" 136 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 137 { 138 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 141 142 PetscFunctionBegin; 143 for (i=0; i<m; i++) { 144 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 145 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 146 if (idxm[i] >= rstart && idxm[i] < rend) { 147 row = idxm[i] - rstart; 148 for (j=0; j<n; j++) { 149 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 150 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 151 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 152 } 153 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 154 } 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatDenseGetArray_MPIDense" 160 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 161 { 162 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 163 PetscErrorCode ierr; 164 165 PetscFunctionBegin; 166 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 172 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 173 { 174 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 175 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 176 PetscErrorCode ierr; 177 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 178 const PetscInt *irow,*icol; 179 PetscScalar *av,*bv,*v = lmat->v; 180 Mat newmat; 181 IS iscol_local; 182 183 PetscFunctionBegin; 184 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 185 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 186 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 187 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 188 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 189 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 190 191 /* No parallel redistribution currently supported! Should really check each index set 192 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 193 original matrix! */ 194 195 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 196 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 197 198 /* Check submatrix call */ 199 if (scall == MAT_REUSE_MATRIX) { 200 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 201 /* Really need to test rows and column sizes! */ 202 newmat = *B; 203 } else { 204 /* Create and fill new matrix */ 205 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 206 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 207 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 208 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 209 } 210 211 /* Now extract the data pointers and do the copy, column at a time */ 212 newmatd = (Mat_MPIDense*)newmat->data; 213 bv = ((Mat_SeqDense*)newmatd->A->data)->v; 214 215 for (i=0; i<Ncols; i++) { 216 av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 217 for (j=0; j<nrows; j++) { 218 *bv++ = av[irow[j] - rstart]; 219 } 220 } 221 222 /* Assemble the matrices so that the correct flags are set */ 223 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 224 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 225 226 /* Free work space */ 227 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 228 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 229 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 230 *B = newmat; 231 PetscFunctionReturn(0); 232 } 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatDenseRestoreArray_MPIDense" 236 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 237 { 238 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 239 PetscErrorCode ierr; 240 241 PetscFunctionBegin; 242 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 248 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 249 { 250 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 251 MPI_Comm comm; 252 PetscErrorCode ierr; 253 PetscInt nstash,reallocs; 254 InsertMode addv; 255 256 PetscFunctionBegin; 257 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 258 /* make sure all processors are either in INSERTMODE or ADDMODE */ 259 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 260 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 261 mat->insertmode = addv; /* in case this processor had no cache */ 262 263 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 264 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 265 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 271 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 272 { 273 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 274 PetscErrorCode ierr; 275 PetscInt i,*row,*col,flg,j,rstart,ncols; 276 PetscMPIInt n; 277 PetscScalar *val; 278 InsertMode addv=mat->insertmode; 279 280 PetscFunctionBegin; 281 /* wait on receives */ 282 while (1) { 283 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 284 if (!flg) break; 285 286 for (i=0; i<n;) { 287 /* Now identify the consecutive vals belonging to the same row */ 288 for (j=i,rstart=row[j]; j<n; j++) { 289 if (row[j] != rstart) break; 290 } 291 if (j < n) ncols = j-i; 292 else ncols = n-i; 293 /* Now assemble all these values with a single function call */ 294 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 295 i = j; 296 } 297 } 298 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 299 300 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 302 303 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 304 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 305 } 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatZeroEntries_MPIDense" 311 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 312 { 313 PetscErrorCode ierr; 314 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 315 316 PetscFunctionBegin; 317 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 /* the code does not do the diagonal entries correctly unless the 322 matrix is square and the column and row owerships are identical. 323 This is a BUG. The only way to fix it seems to be to access 324 mdn->A and mdn->B directly and not through the MatZeroRows() 325 routine. 326 */ 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatZeroRows_MPIDense" 329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 330 { 331 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 332 PetscErrorCode ierr; 333 PetscInt i,*owners = A->rmap->range; 334 PetscInt *nprocs,j,idx,nsends; 335 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 336 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 337 PetscInt *lens,*lrows,*values; 338 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 339 MPI_Comm comm; 340 MPI_Request *send_waits,*recv_waits; 341 MPI_Status recv_status,*send_status; 342 PetscBool found; 343 const PetscScalar *xx; 344 PetscScalar *bb; 345 346 PetscFunctionBegin; 347 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 348 if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 349 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 350 /* first count number of contributors to each processor */ 351 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 352 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 353 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 354 for (i=0; i<N; i++) { 355 idx = rows[i]; 356 found = PETSC_FALSE; 357 for (j=0; j<size; j++) { 358 if (idx >= owners[j] && idx < owners[j+1]) { 359 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 360 } 361 } 362 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 363 } 364 nsends = 0; 365 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 366 367 /* inform other processors of number of messages and max length*/ 368 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 369 370 /* post receives: */ 371 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 372 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 373 for (i=0; i<nrecvs; i++) { 374 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 375 } 376 377 /* do sends: 378 1) starts[i] gives the starting index in svalues for stuff going to 379 the ith processor 380 */ 381 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 382 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 383 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 384 385 starts[0] = 0; 386 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 387 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 388 389 starts[0] = 0; 390 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 391 count = 0; 392 for (i=0; i<size; i++) { 393 if (nprocs[2*i+1]) { 394 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 395 } 396 } 397 ierr = PetscFree(starts);CHKERRQ(ierr); 398 399 base = owners[rank]; 400 401 /* wait on receives */ 402 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 403 count = nrecvs; 404 slen = 0; 405 while (count) { 406 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 407 /* unpack receives into our local space */ 408 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 409 410 source[imdex] = recv_status.MPI_SOURCE; 411 lens[imdex] = n; 412 slen += n; 413 count--; 414 } 415 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 416 417 /* move the data into the send scatter */ 418 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 419 count = 0; 420 for (i=0; i<nrecvs; i++) { 421 values = rvalues + i*nmax; 422 for (j=0; j<lens[i]; j++) { 423 lrows[count++] = values[j] - base; 424 } 425 } 426 ierr = PetscFree(rvalues);CHKERRQ(ierr); 427 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 428 ierr = PetscFree(owner);CHKERRQ(ierr); 429 ierr = PetscFree(nprocs);CHKERRQ(ierr); 430 431 /* fix right hand side if needed */ 432 if (x && b) { 433 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 434 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 435 for (i=0; i<slen; i++) { 436 bb[lrows[i]] = diag*xx[lrows[i]]; 437 } 438 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 439 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 440 } 441 442 /* actually zap the local rows */ 443 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 444 if (diag != 0.0) { 445 Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 446 PetscInt m = ll->lda, i; 447 448 for (i=0; i<slen; i++) { 449 ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 450 } 451 } 452 ierr = PetscFree(lrows);CHKERRQ(ierr); 453 454 /* wait on sends */ 455 if (nsends) { 456 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 457 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 458 ierr = PetscFree(send_status);CHKERRQ(ierr); 459 } 460 ierr = PetscFree(send_waits);CHKERRQ(ierr); 461 ierr = PetscFree(svalues);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatMult_MPIDense" 467 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 468 { 469 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 474 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 475 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatMultAdd_MPIDense" 481 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 482 { 483 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 484 PetscErrorCode ierr; 485 486 PetscFunctionBegin; 487 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 488 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 489 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 490 PetscFunctionReturn(0); 491 } 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatMultTranspose_MPIDense" 495 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 496 { 497 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 498 PetscErrorCode ierr; 499 PetscScalar zero = 0.0; 500 501 PetscFunctionBegin; 502 ierr = VecSet(yy,zero);CHKERRQ(ierr); 503 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 504 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 505 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 511 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 512 { 513 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 514 PetscErrorCode ierr; 515 516 PetscFunctionBegin; 517 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 518 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 519 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 520 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 521 PetscFunctionReturn(0); 522 } 523 524 #undef __FUNCT__ 525 #define __FUNCT__ "MatGetDiagonal_MPIDense" 526 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 527 { 528 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 529 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 530 PetscErrorCode ierr; 531 PetscInt len,i,n,m = A->rmap->n,radd; 532 PetscScalar *x,zero = 0.0; 533 534 PetscFunctionBegin; 535 ierr = VecSet(v,zero);CHKERRQ(ierr); 536 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 537 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 538 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 539 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 540 radd = A->rmap->rstart*m; 541 for (i=0; i<len; i++) { 542 x[i] = aloc->v[radd + i*m + i]; 543 } 544 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatDestroy_MPIDense" 550 PetscErrorCode MatDestroy_MPIDense(Mat mat) 551 { 552 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 553 PetscErrorCode ierr; 554 555 PetscFunctionBegin; 556 #if defined(PETSC_USE_LOG) 557 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 558 #endif 559 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 560 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 561 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 562 ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 563 564 ierr = PetscFree(mat->data);CHKERRQ(ierr); 565 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatView_MPIDense_Binary" 576 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 577 { 578 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 579 PetscErrorCode ierr; 580 PetscViewerFormat format; 581 int fd; 582 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 583 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 584 PetscScalar *work,*v,*vv; 585 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 586 587 PetscFunctionBegin; 588 if (mdn->size == 1) { 589 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 590 } else { 591 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 592 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 593 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 594 595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 596 if (format == PETSC_VIEWER_NATIVE) { 597 598 if (!rank) { 599 /* store the matrix as a dense matrix */ 600 header[0] = MAT_FILE_CLASSID; 601 header[1] = mat->rmap->N; 602 header[2] = N; 603 header[3] = MATRIX_BINARY_FORMAT_DENSE; 604 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 605 606 /* get largest work array needed for transposing array */ 607 mmax = mat->rmap->n; 608 for (i=1; i<size; i++) { 609 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 610 } 611 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 612 613 /* write out local array, by rows */ 614 m = mat->rmap->n; 615 v = a->v; 616 for (j=0; j<N; j++) { 617 for (i=0; i<m; i++) { 618 work[j + i*N] = *v++; 619 } 620 } 621 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 622 /* get largest work array to receive messages from other processes, excludes process zero */ 623 mmax = 0; 624 for (i=1; i<size; i++) { 625 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 626 } 627 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 628 for (k = 1; k < size; k++) { 629 v = vv; 630 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 631 ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 632 633 for (j = 0; j < N; j++) { 634 for (i = 0; i < m; i++) { 635 work[j + i*N] = *v++; 636 } 637 } 638 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(work);CHKERRQ(ierr); 641 ierr = PetscFree(vv);CHKERRQ(ierr); 642 } else { 643 ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 644 } 645 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 646 } 647 PetscFunctionReturn(0); 648 } 649 650 #include <petscdraw.h> 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 653 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 654 { 655 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 656 PetscErrorCode ierr; 657 PetscMPIInt size = mdn->size,rank = mdn->rank; 658 PetscViewerType vtype; 659 PetscBool iascii,isdraw; 660 PetscViewer sviewer; 661 PetscViewerFormat format; 662 663 PetscFunctionBegin; 664 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 665 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 666 if (iascii) { 667 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 668 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 669 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 670 MatInfo info; 671 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 672 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 673 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 674 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 675 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 676 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } else if (format == PETSC_VIEWER_ASCII_INFO) { 679 PetscFunctionReturn(0); 680 } 681 } else if (isdraw) { 682 PetscDraw draw; 683 PetscBool isnull; 684 685 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 686 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 687 if (isnull) PetscFunctionReturn(0); 688 } 689 690 if (size == 1) { 691 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 692 } else { 693 /* assemble the entire matrix onto first processor. */ 694 Mat A; 695 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 696 PetscInt *cols; 697 PetscScalar *vals; 698 699 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 700 if (!rank) { 701 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 702 } else { 703 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 704 } 705 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 706 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 707 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 708 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 709 710 /* Copy the matrix ... This isn't the most efficient means, 711 but it's quick for now */ 712 A->insertmode = INSERT_VALUES; 713 714 row = mat->rmap->rstart; 715 m = mdn->A->rmap->n; 716 for (i=0; i<m; i++) { 717 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 718 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 719 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 720 row++; 721 } 722 723 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 724 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 725 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 726 if (!rank) { 727 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 728 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 729 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 730 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 731 } 732 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 733 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 734 ierr = MatDestroy(&A);CHKERRQ(ierr); 735 } 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIDense" 741 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 742 { 743 PetscErrorCode ierr; 744 PetscBool iascii,isbinary,isdraw,issocket; 745 746 PetscFunctionBegin; 747 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 748 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 749 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 750 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 751 752 if (iascii || issocket || isdraw) { 753 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 754 } else if (isbinary) { 755 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 756 } 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatGetInfo_MPIDense" 762 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 763 { 764 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 765 Mat mdn = mat->A; 766 PetscErrorCode ierr; 767 PetscReal isend[5],irecv[5]; 768 769 PetscFunctionBegin; 770 info->block_size = 1.0; 771 772 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 773 774 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 775 isend[3] = info->memory; isend[4] = info->mallocs; 776 if (flag == MAT_LOCAL) { 777 info->nz_used = isend[0]; 778 info->nz_allocated = isend[1]; 779 info->nz_unneeded = isend[2]; 780 info->memory = isend[3]; 781 info->mallocs = isend[4]; 782 } else if (flag == MAT_GLOBAL_MAX) { 783 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 784 785 info->nz_used = irecv[0]; 786 info->nz_allocated = irecv[1]; 787 info->nz_unneeded = irecv[2]; 788 info->memory = irecv[3]; 789 info->mallocs = irecv[4]; 790 } else if (flag == MAT_GLOBAL_SUM) { 791 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 792 793 info->nz_used = irecv[0]; 794 info->nz_allocated = irecv[1]; 795 info->nz_unneeded = irecv[2]; 796 info->memory = irecv[3]; 797 info->mallocs = irecv[4]; 798 } 799 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 800 info->fill_ratio_needed = 0; 801 info->factor_mallocs = 0; 802 PetscFunctionReturn(0); 803 } 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatSetOption_MPIDense" 807 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 808 { 809 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 810 PetscErrorCode ierr; 811 812 PetscFunctionBegin; 813 switch (op) { 814 case MAT_NEW_NONZERO_LOCATIONS: 815 case MAT_NEW_NONZERO_LOCATION_ERR: 816 case MAT_NEW_NONZERO_ALLOCATION_ERR: 817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 818 break; 819 case MAT_ROW_ORIENTED: 820 a->roworiented = flg; 821 822 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 823 break; 824 case MAT_NEW_DIAGONALS: 825 case MAT_KEEP_NONZERO_PATTERN: 826 case MAT_USE_HASH_TABLE: 827 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 828 break; 829 case MAT_IGNORE_OFF_PROC_ENTRIES: 830 a->donotstash = flg; 831 break; 832 case MAT_SYMMETRIC: 833 case MAT_STRUCTURALLY_SYMMETRIC: 834 case MAT_HERMITIAN: 835 case MAT_SYMMETRY_ETERNAL: 836 case MAT_IGNORE_LOWER_TRIANGULAR: 837 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 838 break; 839 default: 840 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 841 } 842 PetscFunctionReturn(0); 843 } 844 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatDiagonalScale_MPIDense" 848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 849 { 850 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 851 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 852 PetscScalar *l,*r,x,*v; 853 PetscErrorCode ierr; 854 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 855 856 PetscFunctionBegin; 857 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 858 if (ll) { 859 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 860 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 861 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 862 for (i=0; i<m; i++) { 863 x = l[i]; 864 v = mat->v + i; 865 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 866 } 867 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 868 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 869 } 870 if (rr) { 871 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 872 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 873 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 876 for (i=0; i<n; i++) { 877 x = r[i]; 878 v = mat->v + i*m; 879 for (j=0; j<m; j++) (*v++) *= x; 880 } 881 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 882 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 883 } 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "MatNorm_MPIDense" 889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 890 { 891 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 892 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 893 PetscErrorCode ierr; 894 PetscInt i,j; 895 PetscReal sum = 0.0; 896 PetscScalar *v = mat->v; 897 898 PetscFunctionBegin; 899 if (mdn->size == 1) { 900 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 901 } else { 902 if (type == NORM_FROBENIUS) { 903 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 904 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 905 } 906 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 *nrm = PetscSqrtReal(*nrm); 908 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 909 } else if (type == NORM_1) { 910 PetscReal *tmp,*tmp2; 911 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 912 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 913 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 914 *nrm = 0.0; 915 v = mat->v; 916 for (j=0; j<mdn->A->cmap->n; j++) { 917 for (i=0; i<mdn->A->rmap->n; i++) { 918 tmp[j] += PetscAbsScalar(*v); v++; 919 } 920 } 921 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 922 for (j=0; j<A->cmap->N; j++) { 923 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 924 } 925 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 926 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 927 } else if (type == NORM_INFINITY) { /* max row norm */ 928 PetscReal ntemp; 929 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 930 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 931 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatTranspose_MPIDense" 938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 939 { 940 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 941 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 942 Mat B; 943 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 944 PetscErrorCode ierr; 945 PetscInt j,i; 946 PetscScalar *v; 947 948 PetscFunctionBegin; 949 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 950 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 951 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 952 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 953 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 954 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 955 } else { 956 B = *matout; 957 } 958 959 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 960 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 961 for (i=0; i<m; i++) rwork[i] = rstart + i; 962 for (j=0; j<n; j++) { 963 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 964 v += m; 965 } 966 ierr = PetscFree(rwork);CHKERRQ(ierr); 967 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 968 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 969 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 970 *matout = B; 971 } else { 972 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 973 } 974 PetscFunctionReturn(0); 975 } 976 977 978 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 979 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatSetUp_MPIDense" 983 PetscErrorCode MatSetUp_MPIDense(Mat A) 984 { 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatAXPY_MPIDense" 994 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 995 { 996 PetscErrorCode ierr; 997 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 998 999 PetscFunctionBegin; 1000 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatConjugate_MPIDense" 1006 PetscErrorCode MatConjugate_MPIDense(Mat mat) 1007 { 1008 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1009 PetscErrorCode ierr; 1010 1011 PetscFunctionBegin; 1012 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatRealPart_MPIDense" 1018 PetscErrorCode MatRealPart_MPIDense(Mat A) 1019 { 1020 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1030 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1031 { 1032 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1033 PetscErrorCode ierr; 1034 1035 PetscFunctionBegin; 1036 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1037 PetscFunctionReturn(0); 1038 } 1039 1040 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatGetColumnNorms_MPIDense" 1043 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 1044 { 1045 PetscErrorCode ierr; 1046 PetscInt i,n; 1047 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1048 PetscReal *work; 1049 1050 PetscFunctionBegin; 1051 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1052 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 1053 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1054 if (type == NORM_2) { 1055 for (i=0; i<n; i++) work[i] *= work[i]; 1056 } 1057 if (type == NORM_INFINITY) { 1058 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1059 } else { 1060 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1061 } 1062 ierr = PetscFree(work);CHKERRQ(ierr); 1063 if (type == NORM_2) { 1064 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatSetRandom_MPIDense" 1071 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1072 { 1073 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1074 PetscErrorCode ierr; 1075 PetscScalar *a; 1076 PetscInt m,n,i; 1077 1078 PetscFunctionBegin; 1079 ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 1080 ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 1081 for (i=0; i<m*n; i++) { 1082 ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 1083 } 1084 ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* -------------------------------------------------------------------*/ 1089 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1090 MatGetRow_MPIDense, 1091 MatRestoreRow_MPIDense, 1092 MatMult_MPIDense, 1093 /* 4*/ MatMultAdd_MPIDense, 1094 MatMultTranspose_MPIDense, 1095 MatMultTransposeAdd_MPIDense, 1096 0, 1097 0, 1098 0, 1099 /* 10*/ 0, 1100 0, 1101 0, 1102 0, 1103 MatTranspose_MPIDense, 1104 /* 15*/ MatGetInfo_MPIDense, 1105 MatEqual_MPIDense, 1106 MatGetDiagonal_MPIDense, 1107 MatDiagonalScale_MPIDense, 1108 MatNorm_MPIDense, 1109 /* 20*/ MatAssemblyBegin_MPIDense, 1110 MatAssemblyEnd_MPIDense, 1111 MatSetOption_MPIDense, 1112 MatZeroEntries_MPIDense, 1113 /* 24*/ MatZeroRows_MPIDense, 1114 0, 1115 0, 1116 0, 1117 0, 1118 /* 29*/ MatSetUp_MPIDense, 1119 0, 1120 0, 1121 0, 1122 0, 1123 /* 34*/ MatDuplicate_MPIDense, 1124 0, 1125 0, 1126 0, 1127 0, 1128 /* 39*/ MatAXPY_MPIDense, 1129 MatGetSubMatrices_MPIDense, 1130 0, 1131 MatGetValues_MPIDense, 1132 0, 1133 /* 44*/ 0, 1134 MatScale_MPIDense, 1135 0, 1136 0, 1137 0, 1138 /* 49*/ MatSetRandom_MPIDense, 1139 0, 1140 0, 1141 0, 1142 0, 1143 /* 54*/ 0, 1144 0, 1145 0, 1146 0, 1147 0, 1148 /* 59*/ MatGetSubMatrix_MPIDense, 1149 MatDestroy_MPIDense, 1150 MatView_MPIDense, 1151 0, 1152 0, 1153 /* 64*/ 0, 1154 0, 1155 0, 1156 0, 1157 0, 1158 /* 69*/ 0, 1159 0, 1160 0, 1161 0, 1162 0, 1163 /* 74*/ 0, 1164 0, 1165 0, 1166 0, 1167 0, 1168 /* 79*/ 0, 1169 0, 1170 0, 1171 0, 1172 /* 83*/ MatLoad_MPIDense, 1173 0, 1174 0, 1175 0, 1176 0, 1177 0, 1178 /* 89*/ 1179 0, 1180 0, 1181 0, 1182 0, 1183 0, 1184 /* 94*/ 0, 1185 0, 1186 0, 1187 0, 1188 0, 1189 /* 99*/ 0, 1190 0, 1191 0, 1192 MatConjugate_MPIDense, 1193 0, 1194 /*104*/ 0, 1195 MatRealPart_MPIDense, 1196 MatImaginaryPart_MPIDense, 1197 0, 1198 0, 1199 /*109*/ 0, 1200 0, 1201 0, 1202 0, 1203 0, 1204 /*114*/ 0, 1205 0, 1206 0, 1207 0, 1208 0, 1209 /*119*/ 0, 1210 0, 1211 0, 1212 0, 1213 0, 1214 /*124*/ 0, 1215 MatGetColumnNorms_MPIDense, 1216 0, 1217 0, 1218 0, 1219 /*129*/ 0, 1220 0, 1221 0, 1222 0, 1223 0, 1224 /*134*/ 0, 1225 0, 1226 0, 1227 0, 1228 0, 1229 /*139*/ 0, 1230 0 1231 }; 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1235 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1236 { 1237 Mat_MPIDense *a; 1238 PetscErrorCode ierr; 1239 1240 PetscFunctionBegin; 1241 mat->preallocated = PETSC_TRUE; 1242 /* Note: For now, when data is specified above, this assumes the user correctly 1243 allocates the local dense storage space. We should add error checking. */ 1244 1245 a = (Mat_MPIDense*)mat->data; 1246 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1247 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1248 a->nvec = mat->cmap->n; 1249 1250 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1251 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1252 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1253 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1254 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatCreate_MPIDense" 1260 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1261 { 1262 Mat_MPIDense *a; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1267 mat->data = (void*)a; 1268 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1269 1270 mat->insertmode = NOT_SET_VALUES; 1271 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1272 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1273 1274 /* build cache for off array entries formed */ 1275 a->donotstash = PETSC_FALSE; 1276 1277 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1278 1279 /* stuff used for matrix vector multiply */ 1280 a->lvec = 0; 1281 a->Mvctx = 0; 1282 a->roworiented = PETSC_TRUE; 1283 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1286 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1292 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 /*MC 1297 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1298 1299 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1300 and MATMPIDENSE otherwise. 1301 1302 Options Database Keys: 1303 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1304 1305 Level: beginner 1306 1307 1308 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1309 M*/ 1310 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1313 /*@C 1314 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1315 1316 Not collective 1317 1318 Input Parameters: 1319 . A - the matrix 1320 - data - optional location of matrix data. Set data=NULL for PETSc 1321 to control all matrix memory allocation. 1322 1323 Notes: 1324 The dense format is fully compatible with standard Fortran 77 1325 storage by columns. 1326 1327 The data input variable is intended primarily for Fortran programmers 1328 who wish to allocate their own matrix memory space. Most users should 1329 set data=NULL. 1330 1331 Level: intermediate 1332 1333 .keywords: matrix,dense, parallel 1334 1335 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1336 @*/ 1337 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1338 { 1339 PetscErrorCode ierr; 1340 1341 PetscFunctionBegin; 1342 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr); 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatCreateDense" 1348 /*@C 1349 MatCreateDense - Creates a parallel matrix in dense format. 1350 1351 Collective on MPI_Comm 1352 1353 Input Parameters: 1354 + comm - MPI communicator 1355 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1356 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1357 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1358 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1359 - data - optional location of matrix data. Set data=NULL (NULL_SCALAR for Fortran users) for PETSc 1360 to control all matrix memory allocation. 1361 1362 Output Parameter: 1363 . A - the matrix 1364 1365 Notes: 1366 The dense format is fully compatible with standard Fortran 77 1367 storage by columns. 1368 1369 The data input variable is intended primarily for Fortran programmers 1370 who wish to allocate their own matrix memory space. Most users should 1371 set data=NULL (NULL_SCALAR for Fortran users). 1372 1373 The user MUST specify either the local or global matrix dimensions 1374 (possibly both). 1375 1376 Level: intermediate 1377 1378 .keywords: matrix,dense, parallel 1379 1380 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1381 @*/ 1382 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1383 { 1384 PetscErrorCode ierr; 1385 PetscMPIInt size; 1386 1387 PetscFunctionBegin; 1388 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1389 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1390 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1391 if (size > 1) { 1392 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1393 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1394 } else { 1395 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1396 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatDuplicate_MPIDense" 1403 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1404 { 1405 Mat mat; 1406 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1407 PetscErrorCode ierr; 1408 1409 PetscFunctionBegin; 1410 *newmat = 0; 1411 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1412 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1413 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1414 a = (Mat_MPIDense*)mat->data; 1415 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1416 1417 mat->factortype = A->factortype; 1418 mat->assembled = PETSC_TRUE; 1419 mat->preallocated = PETSC_TRUE; 1420 1421 a->size = oldmat->size; 1422 a->rank = oldmat->rank; 1423 mat->insertmode = NOT_SET_VALUES; 1424 a->nvec = oldmat->nvec; 1425 a->donotstash = oldmat->donotstash; 1426 1427 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 1428 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 1429 1430 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1431 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1432 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1433 1434 *newmat = mat; 1435 PetscFunctionReturn(0); 1436 } 1437 1438 #undef __FUNCT__ 1439 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1440 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1441 { 1442 PetscErrorCode ierr; 1443 PetscMPIInt rank,size; 1444 PetscInt *rowners,i,m,nz,j; 1445 PetscScalar *array,*vals,*vals_ptr; 1446 1447 PetscFunctionBegin; 1448 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1449 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1450 1451 /* determine ownership of all rows */ 1452 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1453 else m = newmat->rmap->n; 1454 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1455 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1456 rowners[0] = 0; 1457 for (i=2; i<=size; i++) { 1458 rowners[i] += rowners[i-1]; 1459 } 1460 1461 if (!sizesset) { 1462 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1463 } 1464 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1465 ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 1466 1467 if (!rank) { 1468 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1469 1470 /* read in my part of the matrix numerical values */ 1471 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1472 1473 /* insert into matrix-by row (this is why cannot directly read into array */ 1474 vals_ptr = vals; 1475 for (i=0; i<m; i++) { 1476 for (j=0; j<N; j++) { 1477 array[i + j*m] = *vals_ptr++; 1478 } 1479 } 1480 1481 /* read in other processors and ship out */ 1482 for (i=1; i<size; i++) { 1483 nz = (rowners[i+1] - rowners[i])*N; 1484 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1485 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1486 } 1487 } else { 1488 /* receive numeric values */ 1489 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1490 1491 /* receive message of values*/ 1492 ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1493 1494 /* insert into matrix-by row (this is why cannot directly read into array */ 1495 vals_ptr = vals; 1496 for (i=0; i<m; i++) { 1497 for (j=0; j<N; j++) { 1498 array[i + j*m] = *vals_ptr++; 1499 } 1500 } 1501 } 1502 ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 1503 ierr = PetscFree(rowners);CHKERRQ(ierr); 1504 ierr = PetscFree(vals);CHKERRQ(ierr); 1505 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1506 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatLoad_MPIDense" 1512 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1513 { 1514 PetscScalar *vals,*svals; 1515 MPI_Comm comm; 1516 MPI_Status status; 1517 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1518 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1519 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1520 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1521 int fd; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 1526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1530 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 1531 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1532 } 1533 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1534 1535 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1536 M = header[1]; N = header[2]; nz = header[3]; 1537 1538 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1539 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1540 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1541 1542 /* If global sizes are set, check if they are consistent with that given in the file */ 1543 if (sizesset) { 1544 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1545 } 1546 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1547 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1548 1549 /* 1550 Handle case where matrix is stored on disk as a dense matrix 1551 */ 1552 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1553 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* determine ownership of all rows */ 1558 if (newmat->rmap->n < 0) { 1559 ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 1560 } else { 1561 ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 1562 } 1563 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1564 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1565 rowners[0] = 0; 1566 for (i=2; i<=size; i++) { 1567 rowners[i] += rowners[i-1]; 1568 } 1569 rstart = rowners[rank]; 1570 rend = rowners[rank+1]; 1571 1572 /* distribute row lengths to all processors */ 1573 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1574 if (!rank) { 1575 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1576 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1577 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1578 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1579 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1580 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1581 } else { 1582 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1583 } 1584 1585 if (!rank) { 1586 /* calculate the number of nonzeros on each processor */ 1587 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1588 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1589 for (i=0; i<size; i++) { 1590 for (j=rowners[i]; j< rowners[i+1]; j++) { 1591 procsnz[i] += rowlengths[j]; 1592 } 1593 } 1594 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1595 1596 /* determine max buffer needed and allocate it */ 1597 maxnz = 0; 1598 for (i=0; i<size; i++) { 1599 maxnz = PetscMax(maxnz,procsnz[i]); 1600 } 1601 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1602 1603 /* read in my part of the matrix column indices */ 1604 nz = procsnz[0]; 1605 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1606 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1607 1608 /* read in every one elses and ship off */ 1609 for (i=1; i<size; i++) { 1610 nz = procsnz[i]; 1611 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1612 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1613 } 1614 ierr = PetscFree(cols);CHKERRQ(ierr); 1615 } else { 1616 /* determine buffer space needed for message */ 1617 nz = 0; 1618 for (i=0; i<m; i++) { 1619 nz += ourlens[i]; 1620 } 1621 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1622 1623 /* receive message of column indices*/ 1624 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1625 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1626 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1627 } 1628 1629 /* loop over local rows, determining number of off diagonal entries */ 1630 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1631 jj = 0; 1632 for (i=0; i<m; i++) { 1633 for (j=0; j<ourlens[i]; j++) { 1634 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1635 jj++; 1636 } 1637 } 1638 1639 /* create our matrix */ 1640 for (i=0; i<m; i++) ourlens[i] -= offlens[i]; 1641 1642 if (!sizesset) { 1643 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1644 } 1645 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1646 for (i=0; i<m; i++) ourlens[i] += offlens[i]; 1647 1648 if (!rank) { 1649 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1650 1651 /* read in my part of the matrix numerical values */ 1652 nz = procsnz[0]; 1653 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1654 1655 /* insert into matrix */ 1656 jj = rstart; 1657 smycols = mycols; 1658 svals = vals; 1659 for (i=0; i<m; i++) { 1660 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1661 smycols += ourlens[i]; 1662 svals += ourlens[i]; 1663 jj++; 1664 } 1665 1666 /* read in other processors and ship out */ 1667 for (i=1; i<size; i++) { 1668 nz = procsnz[i]; 1669 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1670 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 1671 } 1672 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1673 } else { 1674 /* receive numeric values */ 1675 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1676 1677 /* receive message of values*/ 1678 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 1679 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1680 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1681 1682 /* insert into matrix */ 1683 jj = rstart; 1684 smycols = mycols; 1685 svals = vals; 1686 for (i=0; i<m; i++) { 1687 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1688 smycols += ourlens[i]; 1689 svals += ourlens[i]; 1690 jj++; 1691 } 1692 } 1693 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 1694 ierr = PetscFree(vals);CHKERRQ(ierr); 1695 ierr = PetscFree(mycols);CHKERRQ(ierr); 1696 ierr = PetscFree(rowners);CHKERRQ(ierr); 1697 1698 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1699 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "MatEqual_MPIDense" 1705 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 1706 { 1707 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1708 Mat a,b; 1709 PetscBool flg; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 a = matA->A; 1714 b = matB->A; 1715 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1716 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720