1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscTruth flg; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 79 PetscFunctionBegin; 80 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 81 82 /* The reuse aspect is not implemented efficiently */ 83 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 84 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatCreate(comm,B);CHKERRQ(ierr); 88 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 89 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 95 *iscopy = PETSC_TRUE; 96 PetscFunctionReturn(0); 97 } 98 EXTERN_C_END 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatSetValues_MPIDense" 102 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 103 { 104 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 105 PetscErrorCode ierr; 106 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 107 PetscTruth roworiented = A->roworiented; 108 109 PetscFunctionBegin; 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else { 125 if (!A->donotstash) { 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 130 } 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatGetValues_MPIDense" 139 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 140 { 141 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 142 PetscErrorCode ierr; 143 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 144 145 PetscFunctionBegin; 146 for (i=0; i<m; i++) { 147 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 148 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 149 if (idxm[i] >= rstart && idxm[i] < rend) { 150 row = idxm[i] - rstart; 151 for (j=0; j<n; j++) { 152 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 153 if (idxn[j] >= mat->cmap.N) { 154 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 155 } 156 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 157 } 158 } else { 159 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 160 } 161 } 162 PetscFunctionReturn(0); 163 } 164 165 #undef __FUNCT__ 166 #define __FUNCT__ "MatGetArray_MPIDense" 167 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNCT__ 178 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 179 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 180 { 181 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 182 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 183 PetscErrorCode ierr; 184 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 185 PetscScalar *av,*bv,*v = lmat->v; 186 Mat newmat; 187 188 PetscFunctionBegin; 189 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 190 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 193 194 /* No parallel redistribution currently supported! Should really check each index set 195 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 196 original matrix! */ 197 198 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 199 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 200 201 /* Check submatrix call */ 202 if (scall == MAT_REUSE_MATRIX) { 203 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 204 /* Really need to test rows and column sizes! */ 205 newmat = *B; 206 } else { 207 /* Create and fill new matrix */ 208 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 209 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 210 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 211 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 212 } 213 214 /* Now extract the data pointers and do the copy, column at a time */ 215 newmatd = (Mat_MPIDense*)newmat->data; 216 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 217 218 for (i=0; i<ncols; i++) { 219 av = v + nlrows*icol[i]; 220 for (j=0; j<nrows; j++) { 221 *bv++ = av[irow[j] - rstart]; 222 } 223 } 224 225 /* Assemble the matrices so that the correct flags are set */ 226 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 228 229 /* Free work space */ 230 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatRestoreArray_MPIDense" 238 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 PetscFunctionBegin; 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 246 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 247 { 248 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 249 MPI_Comm comm = ((PetscObject)mat)->comm; 250 PetscErrorCode ierr; 251 PetscInt nstash,reallocs; 252 InsertMode addv; 253 254 PetscFunctionBegin; 255 /* make sure all processors are either in INSERTMODE or ADDMODE */ 256 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 257 if (addv == (ADD_VALUES|INSERT_VALUES)) { 258 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 259 } 260 mat->insertmode = addv; /* in case this processor had no cache */ 261 262 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 263 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 264 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt i,*row,*col,flg,j,rstart,ncols; 275 PetscMPIInt n; 276 PetscScalar *val; 277 InsertMode addv=mat->insertmode; 278 279 PetscFunctionBegin; 280 /* wait on receives */ 281 while (1) { 282 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 283 if (!flg) break; 284 285 for (i=0; i<n;) { 286 /* Now identify the consecutive vals belonging to the same row */ 287 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 288 if (j < n) ncols = j-i; 289 else ncols = n-i; 290 /* Now assemble all these values with a single function call */ 291 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 292 i = j; 293 } 294 } 295 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 296 297 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 298 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 299 300 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 301 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 302 } 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatZeroEntries_MPIDense" 308 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 309 { 310 PetscErrorCode ierr; 311 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 312 313 PetscFunctionBegin; 314 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 /* the code does not do the diagonal entries correctly unless the 319 matrix is square and the column and row owerships are identical. 320 This is a BUG. The only way to fix it seems to be to access 321 mdn->A and mdn->B directly and not through the MatZeroRows() 322 routine. 323 */ 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroRows_MPIDense" 326 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 327 { 328 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 329 PetscErrorCode ierr; 330 PetscInt i,*owners = A->rmap.range; 331 PetscInt *nprocs,j,idx,nsends; 332 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 333 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 334 PetscInt *lens,*lrows,*values; 335 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 336 MPI_Comm comm = ((PetscObject)A)->comm; 337 MPI_Request *send_waits,*recv_waits; 338 MPI_Status recv_status,*send_status; 339 PetscTruth found; 340 341 PetscFunctionBegin; 342 /* first count number of contributors to each processor */ 343 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 344 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 345 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 346 for (i=0; i<N; i++) { 347 idx = rows[i]; 348 found = PETSC_FALSE; 349 for (j=0; j<size; j++) { 350 if (idx >= owners[j] && idx < owners[j+1]) { 351 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 352 } 353 } 354 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 355 } 356 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 357 358 /* inform other processors of number of messages and max length*/ 359 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 360 361 /* post receives: */ 362 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 363 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 364 for (i=0; i<nrecvs; i++) { 365 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 373 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 374 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 375 starts[0] = 0; 376 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 377 for (i=0; i<N; i++) { 378 svalues[starts[owner[i]]++] = rows[i]; 379 } 380 381 starts[0] = 0; 382 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 count = 0; 384 for (i=0; i<size; i++) { 385 if (nprocs[2*i+1]) { 386 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscFree(starts);CHKERRQ(ierr); 390 391 base = owners[rank]; 392 393 /* wait on receives */ 394 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 395 source = lens + nrecvs; 396 count = nrecvs; slen = 0; 397 while (count) { 398 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 399 /* unpack receives into our local space */ 400 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 401 source[imdex] = recv_status.MPI_SOURCE; 402 lens[imdex] = n; 403 slen += n; 404 count--; 405 } 406 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 407 408 /* move the data into the send scatter */ 409 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 410 count = 0; 411 for (i=0; i<nrecvs; i++) { 412 values = rvalues + i*nmax; 413 for (j=0; j<lens[i]; j++) { 414 lrows[count++] = values[j] - base; 415 } 416 } 417 ierr = PetscFree(rvalues);CHKERRQ(ierr); 418 ierr = PetscFree(lens);CHKERRQ(ierr); 419 ierr = PetscFree(owner);CHKERRQ(ierr); 420 ierr = PetscFree(nprocs);CHKERRQ(ierr); 421 422 /* actually zap the local rows */ 423 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 424 ierr = PetscFree(lrows);CHKERRQ(ierr); 425 426 /* wait on sends */ 427 if (nsends) { 428 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 429 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 430 ierr = PetscFree(send_status);CHKERRQ(ierr); 431 } 432 ierr = PetscFree(send_waits);CHKERRQ(ierr); 433 ierr = PetscFree(svalues);CHKERRQ(ierr); 434 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatMult_MPIDense" 440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 441 { 442 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 447 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 448 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatMultAdd_MPIDense" 454 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 455 { 456 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 461 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatMultTranspose_MPIDense" 468 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 469 { 470 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 471 PetscErrorCode ierr; 472 PetscScalar zero = 0.0; 473 474 PetscFunctionBegin; 475 ierr = VecSet(yy,zero);CHKERRQ(ierr); 476 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 477 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 478 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 484 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 491 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 492 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 493 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatGetDiagonal_MPIDense" 499 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 503 PetscErrorCode ierr; 504 PetscInt len,i,n,m = A->rmap.n,radd; 505 PetscScalar *x,zero = 0.0; 506 507 PetscFunctionBegin; 508 ierr = VecSet(v,zero);CHKERRQ(ierr); 509 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 510 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 511 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 512 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 513 radd = A->rmap.rstart*m; 514 for (i=0; i<len; i++) { 515 x[i] = aloc->v[radd + i*m + i]; 516 } 517 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatDestroy_MPIDense" 523 PetscErrorCode MatDestroy_MPIDense(Mat mat) 524 { 525 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 526 PetscErrorCode ierr; 527 #if defined(PETSC_HAVE_PLAPACK) 528 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 529 #endif 530 531 PetscFunctionBegin; 532 533 #if defined(PETSC_USE_LOG) 534 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 535 #endif 536 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 537 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 538 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 539 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 540 #if defined(PETSC_HAVE_PLAPACK) 541 if (lu->CleanUpPlapack) { 542 PLA_Obj_free(&lu->A); 543 PLA_Obj_free (&lu->pivots); 544 PLA_Temp_free(&lu->templ); 545 PLA_Finalize(); 546 547 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 548 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 549 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 550 } 551 #endif 552 553 ierr = PetscFree(mdn);CHKERRQ(ierr); 554 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatView_MPIDense_Binary" 565 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 566 { 567 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 568 PetscErrorCode ierr; 569 PetscViewerFormat format; 570 int fd; 571 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 572 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 573 PetscScalar *work,*v,*vv; 574 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 575 MPI_Status status; 576 577 PetscFunctionBegin; 578 if (mdn->size == 1) { 579 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 580 } else { 581 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 582 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 583 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 584 585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 586 if (format == PETSC_VIEWER_BINARY_NATIVE) { 587 588 if (!rank) { 589 /* store the matrix as a dense matrix */ 590 header[0] = MAT_FILE_COOKIE; 591 header[1] = mat->rmap.N; 592 header[2] = N; 593 header[3] = MATRIX_BINARY_FORMAT_DENSE; 594 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 595 596 /* get largest work array needed for transposing array */ 597 mmax = mat->rmap.n; 598 for (i=1; i<size; i++) { 599 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 600 } 601 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 602 603 /* write out local array, by rows */ 604 m = mat->rmap.n; 605 v = a->v; 606 for (j=0; j<N; j++) { 607 for (i=0; i<m; i++) { 608 work[j + i*N] = *v++; 609 } 610 } 611 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 612 /* get largest work array to receive messages from other processes, excludes process zero */ 613 mmax = 0; 614 for (i=1; i<size; i++) { 615 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 616 } 617 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 618 v = vv; 619 for (k=1; k<size; k++) { 620 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 621 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 622 623 for (j=0; j<N; j++) { 624 for (i=0; i<m; i++) { 625 work[j + i*N] = *v++; 626 } 627 } 628 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 629 } 630 ierr = PetscFree(work);CHKERRQ(ierr); 631 ierr = PetscFree(vv);CHKERRQ(ierr); 632 } else { 633 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 634 } 635 } 636 } 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 643 { 644 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 645 PetscErrorCode ierr; 646 PetscMPIInt size = mdn->size,rank = mdn->rank; 647 PetscViewerType vtype; 648 PetscTruth iascii,isdraw; 649 PetscViewer sviewer; 650 PetscViewerFormat format; 651 #if defined(PETSC_HAVE_PLAPACK) 652 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 653 #endif 654 655 PetscFunctionBegin; 656 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 657 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 658 if (iascii) { 659 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 660 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 661 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 662 MatInfo info; 663 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 664 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 665 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 666 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 667 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 668 #if defined(PETSC_HAVE_PLAPACK) 669 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 670 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",lu->nprows, lu->npcols);CHKERRQ(ierr); 671 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 672 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->ierror);CHKERRQ(ierr); 673 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr); 674 #endif 675 PetscFunctionReturn(0); 676 } else if (format == PETSC_VIEWER_ASCII_INFO) { 677 PetscFunctionReturn(0); 678 } 679 } else if (isdraw) { 680 PetscDraw draw; 681 PetscTruth isnull; 682 683 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 684 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 685 if (isnull) PetscFunctionReturn(0); 686 } 687 688 if (size == 1) { 689 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 690 } else { 691 /* assemble the entire matrix onto first processor. */ 692 Mat A; 693 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 694 PetscInt *cols; 695 PetscScalar *vals; 696 697 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 698 if (!rank) { 699 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 700 } else { 701 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 702 } 703 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 704 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 705 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 706 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 707 708 /* Copy the matrix ... This isn't the most efficient means, 709 but it's quick for now */ 710 A->insertmode = INSERT_VALUES; 711 row = mat->rmap.rstart; m = mdn->A->rmap.n; 712 for (i=0; i<m; i++) { 713 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 714 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 715 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 716 row++; 717 } 718 719 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 721 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 722 if (!rank) { 723 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 724 } 725 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 726 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 727 ierr = MatDestroy(A);CHKERRQ(ierr); 728 } 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatView_MPIDense" 734 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 735 { 736 PetscErrorCode ierr; 737 PetscTruth iascii,isbinary,isdraw,issocket; 738 739 PetscFunctionBegin; 740 741 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 742 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 743 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 744 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 745 746 if (iascii || issocket || isdraw) { 747 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 748 } else if (isbinary) { 749 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 750 } else { 751 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetInfo_MPIDense" 758 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 759 { 760 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 761 Mat mdn = mat->A; 762 PetscErrorCode ierr; 763 PetscReal isend[5],irecv[5]; 764 765 PetscFunctionBegin; 766 info->rows_global = (double)A->rmap.N; 767 info->columns_global = (double)A->cmap.N; 768 info->rows_local = (double)A->rmap.n; 769 info->columns_local = (double)A->cmap.N; 770 info->block_size = 1.0; 771 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 772 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 773 isend[3] = info->memory; isend[4] = info->mallocs; 774 if (flag == MAT_LOCAL) { 775 info->nz_used = isend[0]; 776 info->nz_allocated = isend[1]; 777 info->nz_unneeded = isend[2]; 778 info->memory = isend[3]; 779 info->mallocs = isend[4]; 780 } else if (flag == MAT_GLOBAL_MAX) { 781 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 782 info->nz_used = irecv[0]; 783 info->nz_allocated = irecv[1]; 784 info->nz_unneeded = irecv[2]; 785 info->memory = irecv[3]; 786 info->mallocs = irecv[4]; 787 } else if (flag == MAT_GLOBAL_SUM) { 788 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } 795 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 796 info->fill_ratio_needed = 0; 797 info->factor_mallocs = 0; 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatSetOption_MPIDense" 803 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 804 { 805 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 switch (op) { 810 case MAT_NEW_NONZERO_LOCATIONS: 811 case MAT_NEW_NONZERO_LOCATION_ERR: 812 case MAT_NEW_NONZERO_ALLOCATION_ERR: 813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 814 break; 815 case MAT_ROW_ORIENTED: 816 a->roworiented = flg; 817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 818 break; 819 case MAT_NEW_DIAGONALS: 820 case MAT_USE_HASH_TABLE: 821 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 822 break; 823 case MAT_IGNORE_OFF_PROC_ENTRIES: 824 a->donotstash = flg; 825 break; 826 case MAT_SYMMETRIC: 827 case MAT_STRUCTURALLY_SYMMETRIC: 828 case MAT_HERMITIAN: 829 case MAT_SYMMETRY_ETERNAL: 830 case MAT_IGNORE_LOWER_TRIANGULAR: 831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 832 break; 833 default: 834 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatDiagonalScale_MPIDense" 842 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 843 { 844 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 845 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 846 PetscScalar *l,*r,x,*v; 847 PetscErrorCode ierr; 848 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 849 850 PetscFunctionBegin; 851 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 852 if (ll) { 853 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 854 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 855 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 856 for (i=0; i<m; i++) { 857 x = l[i]; 858 v = mat->v + i; 859 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 860 } 861 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 862 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 863 } 864 if (rr) { 865 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 866 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 867 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 870 for (i=0; i<n; i++) { 871 x = r[i]; 872 v = mat->v + i*m; 873 for (j=0; j<m; j++) { (*v++) *= x;} 874 } 875 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 876 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatNorm_MPIDense" 883 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 884 { 885 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 886 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j; 889 PetscReal sum = 0.0; 890 PetscScalar *v = mat->v; 891 892 PetscFunctionBegin; 893 if (mdn->size == 1) { 894 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 895 } else { 896 if (type == NORM_FROBENIUS) { 897 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 898 #if defined(PETSC_USE_COMPLEX) 899 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 900 #else 901 sum += (*v)*(*v); v++; 902 #endif 903 } 904 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 905 *nrm = sqrt(*nrm); 906 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 907 } else if (type == NORM_1) { 908 PetscReal *tmp,*tmp2; 909 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 910 tmp2 = tmp + A->cmap.N; 911 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 912 *nrm = 0.0; 913 v = mat->v; 914 for (j=0; j<mdn->A->cmap.n; j++) { 915 for (i=0; i<mdn->A->rmap.n; i++) { 916 tmp[j] += PetscAbsScalar(*v); v++; 917 } 918 } 919 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 920 for (j=0; j<A->cmap.N; j++) { 921 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 922 } 923 ierr = PetscFree(tmp);CHKERRQ(ierr); 924 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 925 } else if (type == NORM_INFINITY) { /* max row norm */ 926 PetscReal ntemp; 927 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 928 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 929 } else { 930 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 931 } 932 } 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatTranspose_MPIDense" 938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 939 { 940 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 941 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 942 Mat B; 943 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 944 PetscErrorCode ierr; 945 PetscInt j,i; 946 PetscScalar *v; 947 948 PetscFunctionBegin; 949 if (A == *matout && M != N) { 950 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 951 } 952 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 953 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 954 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 955 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 956 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 957 } else { 958 B = *matout; 959 } 960 961 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 962 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 963 for (i=0; i<m; i++) rwork[i] = rstart + i; 964 for (j=0; j<n; j++) { 965 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 966 v += m; 967 } 968 ierr = PetscFree(rwork);CHKERRQ(ierr); 969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 if (*matout != A) { 972 *matout = B; 973 } else { 974 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #include "petscblaslapack.h" 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatScale_MPIDense" 982 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 983 { 984 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 985 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 986 PetscScalar oalpha = alpha; 987 PetscErrorCode ierr; 988 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 989 990 PetscFunctionBegin; 991 BLASscal_(&nz,&oalpha,a->v,&one); 992 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1000 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1001 { 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1011 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1012 { 1013 PetscErrorCode ierr; 1014 PetscInt m=A->rmap.n,n=B->cmap.n; 1015 Mat Cmat; 1016 1017 PetscFunctionBegin; 1018 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1019 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1020 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1021 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1022 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1023 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1025 *C = Cmat; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #if defined(PETSC_HAVE_PLAPACK) 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSolve_MPIDense" 1032 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1033 { 1034 MPI_Comm comm = ((PetscObject)A)->comm; 1035 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1036 PetscErrorCode ierr; 1037 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1038 PetscScalar *array; 1039 PetscReal one = 1.0; 1040 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1041 PLA_Obj v_pla = NULL; 1042 PetscScalar *loc_buf; 1043 Vec loc_x; 1044 1045 PetscFunctionBegin; 1046 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1048 1049 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1050 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1051 PLA_Obj_set_to_zero(v_pla); 1052 1053 /* Copy b into rhs_pla */ 1054 PLA_API_begin(); 1055 PLA_Obj_API_open(v_pla); 1056 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1057 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1058 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1059 PLA_Obj_API_close(v_pla); 1060 PLA_API_end(); 1061 1062 if (A->factor == FACTOR_LU){ 1063 /* Apply the permutations to the right hand sides */ 1064 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1065 1066 /* Solve L y = b, overwriting b with y */ 1067 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1068 1069 /* Solve U x = y (=b), overwriting b with x */ 1070 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1071 } else { /* FACTOR_CHOLESKY */ 1072 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1073 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1074 PLA_NONUNIT_DIAG,lu->A,v_pla); 1075 } 1076 1077 /* Copy PLAPACK x into Petsc vector x */ 1078 PLA_Obj_local_length(v_pla, &loc_m); 1079 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1080 PLA_Obj_local_stride(v_pla, &loc_stride); 1081 /* 1082 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1083 */ 1084 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1085 if (!lu->pla_solved){ 1086 1087 PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc); 1088 PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc); 1089 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1090 1091 /* Create IS and cts for VecScatterring */ 1092 PLA_Obj_local_length(v_pla, &loc_m); 1093 PLA_Obj_local_stride(v_pla, &loc_stride); 1094 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1095 idx_petsc = idx_pla + loc_m; 1096 1097 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1098 for (i=0; i<loc_m; i+=lu->nb){ 1099 j = 0; 1100 while (j < lu->nb && i+j < loc_m){ 1101 idx_petsc[i+j] = rstart + j; j++; 1102 } 1103 rstart += size*lu->nb; 1104 } 1105 1106 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1107 1108 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1109 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1110 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1111 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1112 } 1113 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 1116 /* Free data */ 1117 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1118 PLA_Obj_free(&v_pla); 1119 1120 lu->pla_solved = PETSC_TRUE; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1126 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1127 { 1128 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1129 PetscErrorCode ierr; 1130 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1131 PetscInt info_pla=0; 1132 PetscScalar *array,one = 1.0; 1133 1134 PetscFunctionBegin; 1135 PLA_Obj_set_to_zero(lu->A); 1136 1137 /* Copy A into lu->A */ 1138 PLA_API_begin(); 1139 PLA_Obj_API_open(lu->A); 1140 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1141 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1142 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1143 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1144 PLA_Obj_API_close(lu->A); 1145 PLA_API_end(); 1146 1147 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1148 info_pla = PLA_LU(lu->A,lu->pivots); 1149 if (info_pla != 0) 1150 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1151 1152 lu->CleanUpPlapack = PETSC_TRUE; 1153 lu->rstart = rstart; 1154 1155 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1161 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1162 { 1163 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1164 PetscErrorCode ierr; 1165 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1166 PetscInt info_pla=0; 1167 PetscScalar *array,one = 1.0; 1168 1169 PetscFunctionBegin; 1170 1171 PLA_Obj_set_to_zero(lu->A); 1172 1173 /* Copy A into lu->A */ 1174 PLA_API_begin(); 1175 PLA_Obj_API_open(lu->A); 1176 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1177 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1178 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1179 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1180 PLA_Obj_API_close(lu->A); 1181 PLA_API_end(); 1182 1183 /* Factor P A -> Chol */ 1184 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1185 if (info_pla != 0) 1186 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1187 1188 lu->CleanUpPlapack = PETSC_TRUE; 1189 lu->rstart = rstart; 1190 1191 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1197 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1198 { 1199 Mat B; 1200 Mat_Plapack *lu; 1201 PetscErrorCode ierr; 1202 PetscInt M=A->rmap.N,N=A->cmap.N; 1203 MPI_Comm comm=((PetscObject)A)->comm,comm_2d; 1204 PetscMPIInt size; 1205 PetscInt ierror; 1206 1207 PetscFunctionBegin; 1208 /* Create the factorization matrix */ 1209 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1210 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 1211 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1212 1213 lu = (Mat_Plapack*)(B->spptr); 1214 1215 /* Set default Plapack parameters */ 1216 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1217 lu->nprows = 1; lu->npcols = size; 1218 ierror = 0; 1219 lu->nb = M/size; 1220 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1221 1222 /* Set runtime options */ 1223 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1224 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr); 1225 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr); 1226 1227 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1228 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1229 if (ierror){ 1230 PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE ); 1231 } else { 1232 PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE ); 1233 } 1234 lu->ierror = ierror; 1235 1236 lu->nb_alg = 0; 1237 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1238 if (lu->nb_alg){ 1239 pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg); 1240 } 1241 PetscOptionsEnd(); 1242 1243 1244 /* Create a 2D communicator */ 1245 PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d); 1246 lu->comm_2d = comm_2d; 1247 1248 /* Initialize PLAPACK */ 1249 PLA_Init(comm_2d); 1250 1251 /* Create object distribution template */ 1252 lu->templ = NULL; 1253 PLA_Temp_create(lu->nb, 0, &lu->templ); 1254 1255 /* Use suggested nb_alg if it is not provided by user */ 1256 if (lu->nb_alg == 0){ 1257 PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg); 1258 pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg); 1259 } 1260 1261 /* Set the datatype */ 1262 #if defined(PETSC_USE_COMPLEX) 1263 lu->datatype = MPI_DOUBLE_COMPLEX; 1264 #else 1265 lu->datatype = MPI_DOUBLE; 1266 #endif 1267 1268 ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1269 1270 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1271 lu->CleanUpPlapack = PETSC_TRUE; 1272 *F = B; 1273 PetscFunctionReturn(0); 1274 } 1275 1276 /* Note the Petsc r and c permutations are ignored */ 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1279 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1280 { 1281 PetscErrorCode ierr; 1282 PetscInt M = A->rmap.N; 1283 Mat_Plapack *lu; 1284 1285 PetscFunctionBegin; 1286 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1287 lu = (Mat_Plapack*)(*F)->spptr; 1288 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1289 (*F)->factor = FACTOR_LU; 1290 PetscFunctionReturn(0); 1291 } 1292 1293 /* Note the Petsc perm permutation is ignored */ 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1296 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1297 { 1298 PetscErrorCode ierr; 1299 PetscTruth issymmetric,set; 1300 1301 PetscFunctionBegin; 1302 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1303 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1304 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1305 (*F)->factor = FACTOR_CHOLESKY; 1306 PetscFunctionReturn(0); 1307 } 1308 #endif 1309 1310 /* -------------------------------------------------------------------*/ 1311 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1312 MatGetRow_MPIDense, 1313 MatRestoreRow_MPIDense, 1314 MatMult_MPIDense, 1315 /* 4*/ MatMultAdd_MPIDense, 1316 MatMultTranspose_MPIDense, 1317 MatMultTransposeAdd_MPIDense, 1318 #if defined(PETSC_HAVE_PLAPACK) 1319 MatSolve_MPIDense, 1320 #else 1321 0, 1322 #endif 1323 0, 1324 0, 1325 /*10*/ 0, 1326 0, 1327 0, 1328 0, 1329 MatTranspose_MPIDense, 1330 /*15*/ MatGetInfo_MPIDense, 1331 MatEqual_MPIDense, 1332 MatGetDiagonal_MPIDense, 1333 MatDiagonalScale_MPIDense, 1334 MatNorm_MPIDense, 1335 /*20*/ MatAssemblyBegin_MPIDense, 1336 MatAssemblyEnd_MPIDense, 1337 0, 1338 MatSetOption_MPIDense, 1339 MatZeroEntries_MPIDense, 1340 /*25*/ MatZeroRows_MPIDense, 1341 #if defined(PETSC_HAVE_PLAPACK) 1342 MatLUFactorSymbolic_MPIDense, 1343 MatLUFactorNumeric_MPIDense, 1344 MatCholeskyFactorSymbolic_MPIDense, 1345 MatCholeskyFactorNumeric_MPIDense, 1346 #else 1347 0, 1348 0, 1349 0, 1350 0, 1351 #endif 1352 /*30*/ MatSetUpPreallocation_MPIDense, 1353 0, 1354 0, 1355 MatGetArray_MPIDense, 1356 MatRestoreArray_MPIDense, 1357 /*35*/ MatDuplicate_MPIDense, 1358 0, 1359 0, 1360 0, 1361 0, 1362 /*40*/ 0, 1363 MatGetSubMatrices_MPIDense, 1364 0, 1365 MatGetValues_MPIDense, 1366 0, 1367 /*45*/ 0, 1368 MatScale_MPIDense, 1369 0, 1370 0, 1371 0, 1372 /*50*/ 0, 1373 0, 1374 0, 1375 0, 1376 0, 1377 /*55*/ 0, 1378 0, 1379 0, 1380 0, 1381 0, 1382 /*60*/ MatGetSubMatrix_MPIDense, 1383 MatDestroy_MPIDense, 1384 MatView_MPIDense, 1385 0, 1386 0, 1387 /*65*/ 0, 1388 0, 1389 0, 1390 0, 1391 0, 1392 /*70*/ 0, 1393 0, 1394 0, 1395 0, 1396 0, 1397 /*75*/ 0, 1398 0, 1399 0, 1400 0, 1401 0, 1402 /*80*/ 0, 1403 0, 1404 0, 1405 0, 1406 /*84*/ MatLoad_MPIDense, 1407 0, 1408 0, 1409 0, 1410 0, 1411 0, 1412 /*90*/ 0, 1413 MatMatMultSymbolic_MPIDense_MPIDense, 1414 0, 1415 0, 1416 0, 1417 /*95*/ 0, 1418 0, 1419 0, 1420 0}; 1421 1422 EXTERN_C_BEGIN 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1425 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1426 { 1427 Mat_MPIDense *a; 1428 PetscErrorCode ierr; 1429 1430 PetscFunctionBegin; 1431 mat->preallocated = PETSC_TRUE; 1432 /* Note: For now, when data is specified above, this assumes the user correctly 1433 allocates the local dense storage space. We should add error checking. */ 1434 1435 a = (Mat_MPIDense*)mat->data; 1436 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1437 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1438 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1439 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1440 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 EXTERN_C_END 1444 1445 EXTERN_C_BEGIN 1446 #if defined(PETSC_HAVE_PLAPACK) 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1449 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1450 { 1451 PetscFunctionBegin; 1452 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1453 PetscFunctionReturn(0); 1454 } 1455 #endif 1456 EXTERN_C_END 1457 1458 /*MC 1459 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1460 1461 Options Database Keys: 1462 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1463 1464 Level: beginner 1465 1466 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1467 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1468 (run config/configure.py with the option --download-plapack) 1469 1470 1471 Options Database Keys: 1472 . -mat_plapack_nprows <n> - number of rows in processor partition 1473 . -mat_plapack_npcols <n> - number of columns in processor partition 1474 . -mat_plapack_nb <n> - block size of template vector 1475 . -mat_plapack_nb_alg <n> - algorithmic block size 1476 - -mat_plapack_ckerror <n> - error checking flag 1477 1478 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1479 M*/ 1480 1481 EXTERN_C_BEGIN 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatCreate_MPIDense" 1484 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1485 { 1486 Mat_MPIDense *a; 1487 PetscErrorCode ierr; 1488 #if defined(PETSC_HAVE_PLAPACK) 1489 Mat_Plapack *lu; 1490 #endif 1491 1492 PetscFunctionBegin; 1493 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1494 mat->data = (void*)a; 1495 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1496 mat->factor = 0; 1497 mat->mapping = 0; 1498 1499 mat->insertmode = NOT_SET_VALUES; 1500 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1501 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1502 1503 mat->rmap.bs = mat->cmap.bs = 1; 1504 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1505 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1506 a->nvec = mat->cmap.n; 1507 1508 /* build cache for off array entries formed */ 1509 a->donotstash = PETSC_FALSE; 1510 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1511 1512 /* stuff used for matrix vector multiply */ 1513 a->lvec = 0; 1514 a->Mvctx = 0; 1515 a->roworiented = PETSC_TRUE; 1516 1517 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1518 "MatGetDiagonalBlock_MPIDense", 1519 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1521 "MatMPIDenseSetPreallocation_MPIDense", 1522 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1524 "MatMatMult_MPIAIJ_MPIDense", 1525 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1527 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1528 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1530 "MatMatMultNumeric_MPIAIJ_MPIDense", 1531 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1532 #if defined(PETSC_HAVE_PLAPACK) 1533 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1534 "MatSetSolverType_MPIDense_PLAPACK", 1535 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1536 #endif 1537 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1538 1539 #if defined(PETSC_HAVE_PLAPACK) 1540 ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr); 1541 lu->CleanUpPlapack = PETSC_FALSE; 1542 mat->spptr = (void*)lu; 1543 #endif 1544 PetscFunctionReturn(0); 1545 } 1546 EXTERN_C_END 1547 1548 /*MC 1549 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1550 1551 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1552 and MATMPIDENSE otherwise. 1553 1554 Options Database Keys: 1555 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1556 1557 Level: beginner 1558 1559 1560 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1561 M*/ 1562 1563 EXTERN_C_BEGIN 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatCreate_Dense" 1566 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1567 { 1568 PetscErrorCode ierr; 1569 PetscMPIInt size; 1570 1571 PetscFunctionBegin; 1572 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1573 if (size == 1) { 1574 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1575 } else { 1576 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1577 } 1578 PetscFunctionReturn(0); 1579 } 1580 EXTERN_C_END 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1584 /*@C 1585 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1586 1587 Not collective 1588 1589 Input Parameters: 1590 . A - the matrix 1591 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1592 to control all matrix memory allocation. 1593 1594 Notes: 1595 The dense format is fully compatible with standard Fortran 77 1596 storage by columns. 1597 1598 The data input variable is intended primarily for Fortran programmers 1599 who wish to allocate their own matrix memory space. Most users should 1600 set data=PETSC_NULL. 1601 1602 Level: intermediate 1603 1604 .keywords: matrix,dense, parallel 1605 1606 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1607 @*/ 1608 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1609 { 1610 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1611 1612 PetscFunctionBegin; 1613 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1614 if (f) { 1615 ierr = (*f)(mat,data);CHKERRQ(ierr); 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619 1620 #undef __FUNCT__ 1621 #define __FUNCT__ "MatCreateMPIDense" 1622 /*@C 1623 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1624 1625 Collective on MPI_Comm 1626 1627 Input Parameters: 1628 + comm - MPI communicator 1629 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1630 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1631 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1632 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1633 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1634 to control all matrix memory allocation. 1635 1636 Output Parameter: 1637 . A - the matrix 1638 1639 Notes: 1640 The dense format is fully compatible with standard Fortran 77 1641 storage by columns. 1642 1643 The data input variable is intended primarily for Fortran programmers 1644 who wish to allocate their own matrix memory space. Most users should 1645 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1646 1647 The user MUST specify either the local or global matrix dimensions 1648 (possibly both). 1649 1650 Level: intermediate 1651 1652 .keywords: matrix,dense, parallel 1653 1654 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1655 @*/ 1656 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1657 { 1658 PetscErrorCode ierr; 1659 PetscMPIInt size; 1660 1661 PetscFunctionBegin; 1662 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1663 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1665 if (size > 1) { 1666 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1667 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1668 } else { 1669 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1670 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1671 } 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatDuplicate_MPIDense" 1677 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1678 { 1679 Mat mat; 1680 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 *newmat = 0; 1685 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1686 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1687 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1688 a = (Mat_MPIDense*)mat->data; 1689 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1690 mat->factor = A->factor; 1691 mat->assembled = PETSC_TRUE; 1692 mat->preallocated = PETSC_TRUE; 1693 1694 mat->rmap.rstart = A->rmap.rstart; 1695 mat->rmap.rend = A->rmap.rend; 1696 a->size = oldmat->size; 1697 a->rank = oldmat->rank; 1698 mat->insertmode = NOT_SET_VALUES; 1699 a->nvec = oldmat->nvec; 1700 a->donotstash = oldmat->donotstash; 1701 1702 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1703 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1704 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1705 1706 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1707 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1708 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1709 1710 #if defined(PETSC_HAVE_PLAPACK) 1711 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1712 #endif 1713 *newmat = mat; 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #include "petscsys.h" 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1721 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1722 { 1723 PetscErrorCode ierr; 1724 PetscMPIInt rank,size; 1725 PetscInt *rowners,i,m,nz,j; 1726 PetscScalar *array,*vals,*vals_ptr; 1727 MPI_Status status; 1728 1729 PetscFunctionBegin; 1730 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1731 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1732 1733 /* determine ownership of all rows */ 1734 m = M/size + ((M % size) > rank); 1735 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1736 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1737 rowners[0] = 0; 1738 for (i=2; i<=size; i++) { 1739 rowners[i] += rowners[i-1]; 1740 } 1741 1742 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1743 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1744 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1745 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1746 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1747 1748 if (!rank) { 1749 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1750 1751 /* read in my part of the matrix numerical values */ 1752 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1753 1754 /* insert into matrix-by row (this is why cannot directly read into array */ 1755 vals_ptr = vals; 1756 for (i=0; i<m; i++) { 1757 for (j=0; j<N; j++) { 1758 array[i + j*m] = *vals_ptr++; 1759 } 1760 } 1761 1762 /* read in other processors and ship out */ 1763 for (i=1; i<size; i++) { 1764 nz = (rowners[i+1] - rowners[i])*N; 1765 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1766 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1767 } 1768 } else { 1769 /* receive numeric values */ 1770 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1771 1772 /* receive message of values*/ 1773 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1774 1775 /* insert into matrix-by row (this is why cannot directly read into array */ 1776 vals_ptr = vals; 1777 for (i=0; i<m; i++) { 1778 for (j=0; j<N; j++) { 1779 array[i + j*m] = *vals_ptr++; 1780 } 1781 } 1782 } 1783 ierr = PetscFree(rowners);CHKERRQ(ierr); 1784 ierr = PetscFree(vals);CHKERRQ(ierr); 1785 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1786 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatLoad_MPIDense" 1792 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1793 { 1794 Mat A; 1795 PetscScalar *vals,*svals; 1796 MPI_Comm comm = ((PetscObject)viewer)->comm; 1797 MPI_Status status; 1798 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1799 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1800 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1801 PetscInt i,nz,j,rstart,rend; 1802 int fd; 1803 PetscErrorCode ierr; 1804 1805 PetscFunctionBegin; 1806 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1807 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1808 if (!rank) { 1809 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1810 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1811 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1812 } 1813 1814 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1815 M = header[1]; N = header[2]; nz = header[3]; 1816 1817 /* 1818 Handle case where matrix is stored on disk as a dense matrix 1819 */ 1820 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1821 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 /* determine ownership of all rows */ 1826 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1827 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1828 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1829 rowners[0] = 0; 1830 for (i=2; i<=size; i++) { 1831 rowners[i] += rowners[i-1]; 1832 } 1833 rstart = rowners[rank]; 1834 rend = rowners[rank+1]; 1835 1836 /* distribute row lengths to all processors */ 1837 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1838 offlens = ourlens + (rend-rstart); 1839 if (!rank) { 1840 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1841 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1842 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1843 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1844 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1845 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1846 } else { 1847 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1848 } 1849 1850 if (!rank) { 1851 /* calculate the number of nonzeros on each processor */ 1852 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1853 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1854 for (i=0; i<size; i++) { 1855 for (j=rowners[i]; j< rowners[i+1]; j++) { 1856 procsnz[i] += rowlengths[j]; 1857 } 1858 } 1859 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1860 1861 /* determine max buffer needed and allocate it */ 1862 maxnz = 0; 1863 for (i=0; i<size; i++) { 1864 maxnz = PetscMax(maxnz,procsnz[i]); 1865 } 1866 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1867 1868 /* read in my part of the matrix column indices */ 1869 nz = procsnz[0]; 1870 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1871 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1872 1873 /* read in every one elses and ship off */ 1874 for (i=1; i<size; i++) { 1875 nz = procsnz[i]; 1876 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1877 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1878 } 1879 ierr = PetscFree(cols);CHKERRQ(ierr); 1880 } else { 1881 /* determine buffer space needed for message */ 1882 nz = 0; 1883 for (i=0; i<m; i++) { 1884 nz += ourlens[i]; 1885 } 1886 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1887 1888 /* receive message of column indices*/ 1889 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1890 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1891 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1892 } 1893 1894 /* loop over local rows, determining number of off diagonal entries */ 1895 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1896 jj = 0; 1897 for (i=0; i<m; i++) { 1898 for (j=0; j<ourlens[i]; j++) { 1899 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1900 jj++; 1901 } 1902 } 1903 1904 /* create our matrix */ 1905 for (i=0; i<m; i++) { 1906 ourlens[i] -= offlens[i]; 1907 } 1908 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1909 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1910 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1911 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1912 A = *newmat; 1913 for (i=0; i<m; i++) { 1914 ourlens[i] += offlens[i]; 1915 } 1916 1917 if (!rank) { 1918 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1919 1920 /* read in my part of the matrix numerical values */ 1921 nz = procsnz[0]; 1922 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1923 1924 /* insert into matrix */ 1925 jj = rstart; 1926 smycols = mycols; 1927 svals = vals; 1928 for (i=0; i<m; i++) { 1929 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1930 smycols += ourlens[i]; 1931 svals += ourlens[i]; 1932 jj++; 1933 } 1934 1935 /* read in other processors and ship out */ 1936 for (i=1; i<size; i++) { 1937 nz = procsnz[i]; 1938 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1939 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1940 } 1941 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1942 } else { 1943 /* receive numeric values */ 1944 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1945 1946 /* receive message of values*/ 1947 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1948 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1949 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1950 1951 /* insert into matrix */ 1952 jj = rstart; 1953 smycols = mycols; 1954 svals = vals; 1955 for (i=0; i<m; i++) { 1956 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1957 smycols += ourlens[i]; 1958 svals += ourlens[i]; 1959 jj++; 1960 } 1961 } 1962 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1963 ierr = PetscFree(vals);CHKERRQ(ierr); 1964 ierr = PetscFree(mycols);CHKERRQ(ierr); 1965 ierr = PetscFree(rowners);CHKERRQ(ierr); 1966 1967 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatEqual_MPIDense" 1974 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1975 { 1976 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1977 Mat a,b; 1978 PetscTruth flg; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 a = matA->A; 1983 b = matB->A; 1984 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1985 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 1990 1991