1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 #include "src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatDenseGetLocalMatrix" 11 /*@ 12 13 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14 matrix that represents the operator. For sequential matrices it returns itself. 15 16 Input Parameter: 17 . A - the Seq or MPI dense matrix 18 19 Output Parameter: 20 . B - the inner matrix 21 22 Level: intermediate 23 24 @*/ 25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26 { 27 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28 PetscErrorCode ierr; 29 PetscTruth flg; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 33 if (flg) { 34 *B = mat->A; 35 } else { 36 *B = A; 37 } 38 PetscFunctionReturn(0); 39 } 40 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatGetRow_MPIDense" 43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 44 { 45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 46 PetscErrorCode ierr; 47 PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend; 48 49 PetscFunctionBegin; 50 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 51 lrow = row - rstart; 52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNCT__ 57 #define __FUNCT__ "MatRestoreRow_MPIDense" 58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 59 { 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 64 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 65 PetscFunctionReturn(0); 66 } 67 68 EXTERN_C_BEGIN 69 #undef __FUNCT__ 70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 71 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 72 { 73 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 74 PetscErrorCode ierr; 75 PetscInt m = A->rmap.n,rstart = A->rmap.rstart; 76 PetscScalar *array; 77 MPI_Comm comm; 78 79 PetscFunctionBegin; 80 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 81 82 /* The reuse aspect is not implemented efficiently */ 83 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 84 85 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 86 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 87 ierr = MatCreate(comm,B);CHKERRQ(ierr); 88 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 89 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 90 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 91 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 92 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 94 95 *iscopy = PETSC_TRUE; 96 PetscFunctionReturn(0); 97 } 98 EXTERN_C_END 99 100 #undef __FUNCT__ 101 #define __FUNCT__ "MatSetValues_MPIDense" 102 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 103 { 104 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 105 PetscErrorCode ierr; 106 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 107 PetscTruth roworiented = A->roworiented; 108 109 PetscFunctionBegin; 110 for (i=0; i<m; i++) { 111 if (idxm[i] < 0) continue; 112 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 113 if (idxm[i] >= rstart && idxm[i] < rend) { 114 row = idxm[i] - rstart; 115 if (roworiented) { 116 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 117 } else { 118 for (j=0; j<n; j++) { 119 if (idxn[j] < 0) continue; 120 if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 121 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 122 } 123 } 124 } else { 125 if (!A->donotstash) { 126 if (roworiented) { 127 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr); 128 } else { 129 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr); 130 } 131 } 132 } 133 } 134 PetscFunctionReturn(0); 135 } 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatGetValues_MPIDense" 139 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 140 { 141 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 142 PetscErrorCode ierr; 143 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row; 144 145 PetscFunctionBegin; 146 for (i=0; i<m; i++) { 147 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 148 if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 149 if (idxm[i] >= rstart && idxm[i] < rend) { 150 row = idxm[i] - rstart; 151 for (j=0; j<n; j++) { 152 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 153 if (idxn[j] >= mat->cmap.N) { 154 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 155 } 156 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 157 } 158 } else { 159 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 160 } 161 } 162 PetscFunctionReturn(0); 163 } 164 165 #undef __FUNCT__ 166 #define __FUNCT__ "MatGetArray_MPIDense" 167 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 168 { 169 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170 PetscErrorCode ierr; 171 172 PetscFunctionBegin; 173 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNCT__ 178 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 179 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B) 180 { 181 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 182 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 183 PetscErrorCode ierr; 184 PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols; 185 PetscScalar *av,*bv,*v = lmat->v; 186 Mat newmat; 187 188 PetscFunctionBegin; 189 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 190 ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 193 194 /* No parallel redistribution currently supported! Should really check each index set 195 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 196 original matrix! */ 197 198 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 199 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 200 201 /* Check submatrix call */ 202 if (scall == MAT_REUSE_MATRIX) { 203 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 204 /* Really need to test rows and column sizes! */ 205 newmat = *B; 206 } else { 207 /* Create and fill new matrix */ 208 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 209 ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr); 210 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 211 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 212 } 213 214 /* Now extract the data pointers and do the copy, column at a time */ 215 newmatd = (Mat_MPIDense*)newmat->data; 216 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 217 218 for (i=0; i<ncols; i++) { 219 av = v + nlrows*icol[i]; 220 for (j=0; j<nrows; j++) { 221 *bv++ = av[irow[j] - rstart]; 222 } 223 } 224 225 /* Assemble the matrices so that the correct flags are set */ 226 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 227 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 228 229 /* Free work space */ 230 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 232 *B = newmat; 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatRestoreArray_MPIDense" 238 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 239 { 240 PetscFunctionBegin; 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 246 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 247 { 248 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 249 MPI_Comm comm = ((PetscObject)mat)->comm; 250 PetscErrorCode ierr; 251 PetscInt nstash,reallocs; 252 InsertMode addv; 253 254 PetscFunctionBegin; 255 /* make sure all processors are either in INSERTMODE or ADDMODE */ 256 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 257 if (addv == (ADD_VALUES|INSERT_VALUES)) { 258 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 259 } 260 mat->insertmode = addv; /* in case this processor had no cache */ 261 262 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr); 263 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 264 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 273 PetscErrorCode ierr; 274 PetscInt i,*row,*col,flg,j,rstart,ncols; 275 PetscMPIInt n; 276 PetscScalar *val; 277 InsertMode addv=mat->insertmode; 278 279 PetscFunctionBegin; 280 /* wait on receives */ 281 while (1) { 282 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 283 if (!flg) break; 284 285 for (i=0; i<n;) { 286 /* Now identify the consecutive vals belonging to the same row */ 287 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 288 if (j < n) ncols = j-i; 289 else ncols = n-i; 290 /* Now assemble all these values with a single function call */ 291 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 292 i = j; 293 } 294 } 295 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 296 297 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 298 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 299 300 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 301 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 302 } 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "MatZeroEntries_MPIDense" 308 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 309 { 310 PetscErrorCode ierr; 311 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 312 313 PetscFunctionBegin; 314 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 /* the code does not do the diagonal entries correctly unless the 319 matrix is square and the column and row owerships are identical. 320 This is a BUG. The only way to fix it seems to be to access 321 mdn->A and mdn->B directly and not through the MatZeroRows() 322 routine. 323 */ 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroRows_MPIDense" 326 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 327 { 328 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 329 PetscErrorCode ierr; 330 PetscInt i,*owners = A->rmap.range; 331 PetscInt *nprocs,j,idx,nsends; 332 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 333 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 334 PetscInt *lens,*lrows,*values; 335 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 336 MPI_Comm comm = ((PetscObject)A)->comm; 337 MPI_Request *send_waits,*recv_waits; 338 MPI_Status recv_status,*send_status; 339 PetscTruth found; 340 341 PetscFunctionBegin; 342 /* first count number of contributors to each processor */ 343 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 344 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 345 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 346 for (i=0; i<N; i++) { 347 idx = rows[i]; 348 found = PETSC_FALSE; 349 for (j=0; j<size; j++) { 350 if (idx >= owners[j] && idx < owners[j+1]) { 351 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 352 } 353 } 354 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 355 } 356 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 357 358 /* inform other processors of number of messages and max length*/ 359 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 360 361 /* post receives: */ 362 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 363 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 364 for (i=0; i<nrecvs; i++) { 365 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 366 } 367 368 /* do sends: 369 1) starts[i] gives the starting index in svalues for stuff going to 370 the ith processor 371 */ 372 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 373 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 374 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 375 starts[0] = 0; 376 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 377 for (i=0; i<N; i++) { 378 svalues[starts[owner[i]]++] = rows[i]; 379 } 380 381 starts[0] = 0; 382 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 383 count = 0; 384 for (i=0; i<size; i++) { 385 if (nprocs[2*i+1]) { 386 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 387 } 388 } 389 ierr = PetscFree(starts);CHKERRQ(ierr); 390 391 base = owners[rank]; 392 393 /* wait on receives */ 394 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 395 source = lens + nrecvs; 396 count = nrecvs; slen = 0; 397 while (count) { 398 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 399 /* unpack receives into our local space */ 400 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 401 source[imdex] = recv_status.MPI_SOURCE; 402 lens[imdex] = n; 403 slen += n; 404 count--; 405 } 406 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 407 408 /* move the data into the send scatter */ 409 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 410 count = 0; 411 for (i=0; i<nrecvs; i++) { 412 values = rvalues + i*nmax; 413 for (j=0; j<lens[i]; j++) { 414 lrows[count++] = values[j] - base; 415 } 416 } 417 ierr = PetscFree(rvalues);CHKERRQ(ierr); 418 ierr = PetscFree(lens);CHKERRQ(ierr); 419 ierr = PetscFree(owner);CHKERRQ(ierr); 420 ierr = PetscFree(nprocs);CHKERRQ(ierr); 421 422 /* actually zap the local rows */ 423 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 424 ierr = PetscFree(lrows);CHKERRQ(ierr); 425 426 /* wait on sends */ 427 if (nsends) { 428 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 429 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 430 ierr = PetscFree(send_status);CHKERRQ(ierr); 431 } 432 ierr = PetscFree(send_waits);CHKERRQ(ierr); 433 ierr = PetscFree(svalues);CHKERRQ(ierr); 434 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatMult_MPIDense" 440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 441 { 442 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 443 PetscErrorCode ierr; 444 445 PetscFunctionBegin; 446 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 447 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 448 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatMultAdd_MPIDense" 454 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 455 { 456 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 461 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatMultTranspose_MPIDense" 468 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 469 { 470 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 471 PetscErrorCode ierr; 472 PetscScalar zero = 0.0; 473 474 PetscFunctionBegin; 475 ierr = VecSet(yy,zero);CHKERRQ(ierr); 476 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 477 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 478 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 484 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 485 { 486 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 491 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 492 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 493 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 494 PetscFunctionReturn(0); 495 } 496 497 #undef __FUNCT__ 498 #define __FUNCT__ "MatGetDiagonal_MPIDense" 499 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 503 PetscErrorCode ierr; 504 PetscInt len,i,n,m = A->rmap.n,radd; 505 PetscScalar *x,zero = 0.0; 506 507 PetscFunctionBegin; 508 ierr = VecSet(v,zero);CHKERRQ(ierr); 509 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 510 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 511 if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 512 len = PetscMin(a->A->rmap.n,a->A->cmap.n); 513 radd = A->rmap.rstart*m; 514 for (i=0; i<len; i++) { 515 x[i] = aloc->v[radd + i*m + i]; 516 } 517 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatDestroy_MPIDense" 523 PetscErrorCode MatDestroy_MPIDense(Mat mat) 524 { 525 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 526 PetscErrorCode ierr; 527 #if defined(PETSC_HAVE_PLAPACK) 528 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 529 #endif 530 531 PetscFunctionBegin; 532 533 #if defined(PETSC_USE_LOG) 534 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 535 #endif 536 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 537 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 538 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 539 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 540 #if defined(PETSC_HAVE_PLAPACK) 541 if (lu->CleanUpPlapack) { 542 PLA_Obj_free(&lu->A); 543 PLA_Obj_free (&lu->pivots); 544 PLA_Temp_free(&lu->templ); 545 PLA_Finalize(); 546 547 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 548 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 549 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 550 } 551 #endif 552 553 ierr = PetscFree(mdn);CHKERRQ(ierr); 554 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 557 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 558 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 559 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatView_MPIDense_Binary" 565 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 566 { 567 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 568 PetscErrorCode ierr; 569 PetscViewerFormat format; 570 int fd; 571 PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k; 572 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 573 PetscScalar *work,*v,*vv; 574 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 575 MPI_Status status; 576 577 PetscFunctionBegin; 578 if (mdn->size == 1) { 579 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 580 } else { 581 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 582 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 583 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 584 585 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 586 if (format == PETSC_VIEWER_BINARY_NATIVE) { 587 588 if (!rank) { 589 /* store the matrix as a dense matrix */ 590 header[0] = MAT_FILE_COOKIE; 591 header[1] = mat->rmap.N; 592 header[2] = N; 593 header[3] = MATRIX_BINARY_FORMAT_DENSE; 594 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 595 596 /* get largest work array needed for transposing array */ 597 mmax = mat->rmap.n; 598 for (i=1; i<size; i++) { 599 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 600 } 601 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 602 603 /* write out local array, by rows */ 604 m = mat->rmap.n; 605 v = a->v; 606 for (j=0; j<N; j++) { 607 for (i=0; i<m; i++) { 608 work[j + i*N] = *v++; 609 } 610 } 611 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 612 /* get largest work array to receive messages from other processes, excludes process zero */ 613 mmax = 0; 614 for (i=1; i<size; i++) { 615 mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]); 616 } 617 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 618 v = vv; 619 for (k=1; k<size; k++) { 620 m = mat->rmap.range[k+1] - mat->rmap.range[k]; 621 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 622 623 for (j=0; j<N; j++) { 624 for (i=0; i<m; i++) { 625 work[j + i*N] = *v++; 626 } 627 } 628 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 629 } 630 ierr = PetscFree(work);CHKERRQ(ierr); 631 ierr = PetscFree(vv);CHKERRQ(ierr); 632 } else { 633 ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 634 } 635 } 636 } 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 643 { 644 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 645 PetscErrorCode ierr; 646 PetscMPIInt size = mdn->size,rank = mdn->rank; 647 PetscViewerType vtype; 648 PetscTruth iascii,isdraw; 649 PetscViewer sviewer; 650 PetscViewerFormat format; 651 #if defined(PETSC_HAVE_PLAPACK) 652 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 653 #endif 654 655 PetscFunctionBegin; 656 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 657 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 658 if (iascii) { 659 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 660 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 661 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 662 MatInfo info; 663 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 664 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n, 665 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 666 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 667 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 668 #if defined(PETSC_HAVE_PLAPACK) 669 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 670 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",lu->nprows, lu->npcols);CHKERRQ(ierr); 671 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 672 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",lu->ierror);CHKERRQ(ierr); 673 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr); 674 #endif 675 PetscFunctionReturn(0); 676 } else if (format == PETSC_VIEWER_ASCII_INFO) { 677 PetscFunctionReturn(0); 678 } 679 } else if (isdraw) { 680 PetscDraw draw; 681 PetscTruth isnull; 682 683 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 684 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 685 if (isnull) PetscFunctionReturn(0); 686 } 687 688 if (size == 1) { 689 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 690 } else { 691 /* assemble the entire matrix onto first processor. */ 692 Mat A; 693 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz; 694 PetscInt *cols; 695 PetscScalar *vals; 696 697 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 698 if (!rank) { 699 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 700 } else { 701 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 702 } 703 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 704 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 705 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 706 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 707 708 /* Copy the matrix ... This isn't the most efficient means, 709 but it's quick for now */ 710 A->insertmode = INSERT_VALUES; 711 row = mat->rmap.rstart; m = mdn->A->rmap.n; 712 for (i=0; i<m; i++) { 713 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 714 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 715 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 716 row++; 717 } 718 719 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 721 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 722 if (!rank) { 723 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 724 } 725 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 726 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 727 ierr = MatDestroy(A);CHKERRQ(ierr); 728 } 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatView_MPIDense" 734 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 735 { 736 PetscErrorCode ierr; 737 PetscTruth iascii,isbinary,isdraw,issocket; 738 739 PetscFunctionBegin; 740 741 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 742 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 743 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 744 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 745 746 if (iascii || issocket || isdraw) { 747 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 748 } else if (isbinary) { 749 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 750 } else { 751 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatGetInfo_MPIDense" 758 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 759 { 760 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 761 Mat mdn = mat->A; 762 PetscErrorCode ierr; 763 PetscReal isend[5],irecv[5]; 764 765 PetscFunctionBegin; 766 info->rows_global = (double)A->rmap.N; 767 info->columns_global = (double)A->cmap.N; 768 info->rows_local = (double)A->rmap.n; 769 info->columns_local = (double)A->cmap.N; 770 info->block_size = 1.0; 771 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 772 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 773 isend[3] = info->memory; isend[4] = info->mallocs; 774 if (flag == MAT_LOCAL) { 775 info->nz_used = isend[0]; 776 info->nz_allocated = isend[1]; 777 info->nz_unneeded = isend[2]; 778 info->memory = isend[3]; 779 info->mallocs = isend[4]; 780 } else if (flag == MAT_GLOBAL_MAX) { 781 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 782 info->nz_used = irecv[0]; 783 info->nz_allocated = irecv[1]; 784 info->nz_unneeded = irecv[2]; 785 info->memory = irecv[3]; 786 info->mallocs = irecv[4]; 787 } else if (flag == MAT_GLOBAL_SUM) { 788 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 789 info->nz_used = irecv[0]; 790 info->nz_allocated = irecv[1]; 791 info->nz_unneeded = irecv[2]; 792 info->memory = irecv[3]; 793 info->mallocs = irecv[4]; 794 } 795 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 796 info->fill_ratio_needed = 0; 797 info->factor_mallocs = 0; 798 PetscFunctionReturn(0); 799 } 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatSetOption_MPIDense" 803 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 804 { 805 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 switch (op) { 810 case MAT_NEW_NONZERO_LOCATIONS: 811 case MAT_NEW_NONZERO_LOCATION_ERR: 812 case MAT_NEW_NONZERO_ALLOCATION_ERR: 813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 814 break; 815 case MAT_ROW_ORIENTED: 816 a->roworiented = flg; 817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 818 break; 819 case MAT_NEW_DIAGONALS: 820 case MAT_USE_HASH_TABLE: 821 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 822 break; 823 case MAT_IGNORE_OFF_PROC_ENTRIES: 824 a->donotstash = flg; 825 break; 826 case MAT_SYMMETRIC: 827 case MAT_STRUCTURALLY_SYMMETRIC: 828 case MAT_HERMITIAN: 829 case MAT_SYMMETRY_ETERNAL: 830 case MAT_IGNORE_LOWER_TRIANGULAR: 831 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 832 break; 833 default: 834 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatDiagonalScale_MPIDense" 842 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 843 { 844 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 845 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 846 PetscScalar *l,*r,x,*v; 847 PetscErrorCode ierr; 848 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n; 849 850 PetscFunctionBegin; 851 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 852 if (ll) { 853 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 854 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 855 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 856 for (i=0; i<m; i++) { 857 x = l[i]; 858 v = mat->v + i; 859 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 860 } 861 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 862 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 863 } 864 if (rr) { 865 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 866 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 867 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 870 for (i=0; i<n; i++) { 871 x = r[i]; 872 v = mat->v + i*m; 873 for (j=0; j<m; j++) { (*v++) *= x;} 874 } 875 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 876 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 #undef __FUNCT__ 882 #define __FUNCT__ "MatNorm_MPIDense" 883 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 884 { 885 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 886 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 887 PetscErrorCode ierr; 888 PetscInt i,j; 889 PetscReal sum = 0.0; 890 PetscScalar *v = mat->v; 891 892 PetscFunctionBegin; 893 if (mdn->size == 1) { 894 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 895 } else { 896 if (type == NORM_FROBENIUS) { 897 for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) { 898 #if defined(PETSC_USE_COMPLEX) 899 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 900 #else 901 sum += (*v)*(*v); v++; 902 #endif 903 } 904 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 905 *nrm = sqrt(*nrm); 906 ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr); 907 } else if (type == NORM_1) { 908 PetscReal *tmp,*tmp2; 909 ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 910 tmp2 = tmp + A->cmap.N; 911 ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 912 *nrm = 0.0; 913 v = mat->v; 914 for (j=0; j<mdn->A->cmap.n; j++) { 915 for (i=0; i<mdn->A->rmap.n; i++) { 916 tmp[j] += PetscAbsScalar(*v); v++; 917 } 918 } 919 ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 920 for (j=0; j<A->cmap.N; j++) { 921 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 922 } 923 ierr = PetscFree(tmp);CHKERRQ(ierr); 924 ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr); 925 } else if (type == NORM_INFINITY) { /* max row norm */ 926 PetscReal ntemp; 927 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 928 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 929 } else { 930 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 931 } 932 } 933 PetscFunctionReturn(0); 934 } 935 936 #undef __FUNCT__ 937 #define __FUNCT__ "MatTranspose_MPIDense" 938 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 939 { 940 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 941 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 942 Mat B; 943 PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart; 944 PetscErrorCode ierr; 945 PetscInt j,i; 946 PetscScalar *v; 947 948 PetscFunctionBegin; 949 if (A == *matout && M != N) { 950 SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 951 } 952 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 953 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 954 ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr); 955 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 956 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 957 } else { 958 B = *matout; 959 } 960 961 m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v; 962 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 963 for (i=0; i<m; i++) rwork[i] = rstart + i; 964 for (j=0; j<n; j++) { 965 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 966 v += m; 967 } 968 ierr = PetscFree(rwork);CHKERRQ(ierr); 969 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 970 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 if (*matout != A) { 972 *matout = B; 973 } else { 974 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #include "petscblaslapack.h" 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatScale_MPIDense" 982 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) 983 { 984 Mat_MPIDense *A = (Mat_MPIDense*)inA->data; 985 Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; 986 PetscScalar oalpha = alpha; 987 PetscErrorCode ierr; 988 PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N); 989 990 PetscFunctionBegin; 991 BLASscal_(&nz,&oalpha,a->v,&one); 992 ierr = PetscLogFlops(nz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 997 998 #undef __FUNCT__ 999 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1000 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1001 { 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1011 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1012 { 1013 PetscErrorCode ierr; 1014 PetscInt m=A->rmap.n,n=B->cmap.n; 1015 Mat Cmat; 1016 1017 PetscFunctionBegin; 1018 if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n); 1019 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1020 ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr); 1021 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1022 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 1023 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1025 *C = Cmat; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #if defined(PETSC_HAVE_PLAPACK) 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSolve_MPIDense" 1032 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1033 { 1034 MPI_Comm comm = ((PetscObject)A)->comm; 1035 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1036 PetscErrorCode ierr; 1037 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1038 PetscScalar *array; 1039 PetscReal one = 1.0; 1040 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1041 PLA_Obj v_pla = NULL; 1042 PetscScalar *loc_buf; 1043 Vec loc_x; 1044 1045 PetscFunctionBegin; 1046 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1047 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1048 1049 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1050 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1051 PLA_Obj_set_to_zero(v_pla); 1052 1053 /* Copy b into rhs_pla */ 1054 PLA_API_begin(); 1055 PLA_Obj_API_open(v_pla); 1056 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1057 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1058 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1059 PLA_Obj_API_close(v_pla); 1060 PLA_API_end(); 1061 1062 if (A->factor == FACTOR_LU){ 1063 /* Apply the permutations to the right hand sides */ 1064 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1065 1066 /* Solve L y = b, overwriting b with y */ 1067 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1068 1069 /* Solve U x = y (=b), overwriting b with x */ 1070 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1071 } else { /* FACTOR_CHOLESKY */ 1072 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1073 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1074 PLA_NONUNIT_DIAG,lu->A,v_pla); 1075 } 1076 1077 /* Copy PLAPACK x into Petsc vector x */ 1078 PLA_Obj_local_length(v_pla, &loc_m); 1079 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1080 PLA_Obj_local_stride(v_pla, &loc_stride); 1081 /* 1082 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1083 */ 1084 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1085 if (!lu->pla_solved){ 1086 1087 PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc); 1088 PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc); 1089 /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */ 1090 1091 /* Create IS and cts for VecScatterring */ 1092 PLA_Obj_local_length(v_pla, &loc_m); 1093 PLA_Obj_local_stride(v_pla, &loc_stride); 1094 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1095 idx_petsc = idx_pla + loc_m; 1096 1097 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1098 for (i=0; i<loc_m; i+=lu->nb){ 1099 j = 0; 1100 while (j < lu->nb && i+j < loc_m){ 1101 idx_petsc[i+j] = rstart + j; j++; 1102 } 1103 rstart += size*lu->nb; 1104 } 1105 1106 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1107 1108 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1109 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1110 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1111 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1112 } 1113 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 1116 /* Free data */ 1117 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1118 PLA_Obj_free(&v_pla); 1119 1120 lu->pla_solved = PETSC_TRUE; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1126 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1127 { 1128 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1129 PetscErrorCode ierr; 1130 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1131 PetscInt info_pla=0; 1132 PetscScalar *array,one = 1.0; 1133 1134 PetscFunctionBegin; 1135 /* Create PLAPACK matrix object */ 1136 if (!lu->A) { 1137 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1138 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1139 } 1140 PLA_Obj_set_to_zero(lu->A); 1141 1142 /* Copy A into lu->A */ 1143 PLA_API_begin(); 1144 PLA_Obj_API_open(lu->A); 1145 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1146 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1147 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1148 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1149 PLA_Obj_API_close(lu->A); 1150 PLA_API_end(); 1151 1152 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1153 info_pla = PLA_LU(lu->A,lu->pivots); 1154 if (info_pla != 0) 1155 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1156 1157 lu->CleanUpPlapack = PETSC_TRUE; 1158 lu->rstart = rstart; 1159 1160 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1166 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F) 1167 { 1168 Mat_Plapack *lu = (Mat_Plapack*)(*F)->spptr; 1169 PetscErrorCode ierr; 1170 PetscInt M=A->rmap.N,m=A->rmap.n,rstart,rend; 1171 PetscInt info_pla=0; 1172 PetscScalar *array,one = 1.0; 1173 1174 PetscFunctionBegin; 1175 1176 /* Create PLAPACK matrix object */ 1177 if (!lu->A) { 1178 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1179 } 1180 PLA_Obj_set_to_zero(lu->A); 1181 1182 /* Copy A into lu->A */ 1183 PLA_API_begin(); 1184 PLA_Obj_API_open(lu->A); 1185 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1186 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1187 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1188 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1189 PLA_Obj_API_close(lu->A); 1190 PLA_API_end(); 1191 1192 /* Factor P A -> Chol */ 1193 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1194 if (info_pla != 0) 1195 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1196 1197 lu->CleanUpPlapack = PETSC_TRUE; 1198 lu->rstart = rstart; 1199 1200 (*F)->assembled = PETSC_TRUE; /* required by -ksp_view */ 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private" 1206 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F) 1207 { 1208 Mat B; 1209 Mat_Plapack *lu; 1210 PetscErrorCode ierr; 1211 PetscInt M=A->rmap.N,N=A->cmap.N; 1212 MPI_Comm comm=((PetscObject)A)->comm,comm_2d; 1213 PetscMPIInt size; 1214 PetscInt ierror; 1215 1216 PetscFunctionBegin; 1217 /* Create the factorization matrix */ 1218 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 1219 ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr); 1220 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1221 1222 lu = (Mat_Plapack*)(B->spptr); 1223 1224 /* Set default Plapack parameters */ 1225 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1226 lu->nprows = 1; lu->npcols = size; 1227 ierror = 0; 1228 lu->nb = M/size; 1229 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1230 1231 /* Set runtime options */ 1232 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1233 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr); 1234 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr); 1235 1236 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1237 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr); 1238 if (ierror){ 1239 PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE ); 1240 } else { 1241 PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE ); 1242 } 1243 lu->ierror = ierror; 1244 1245 lu->nb_alg = 0; 1246 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr); 1247 if (lu->nb_alg){ 1248 pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg); 1249 } 1250 PetscOptionsEnd(); 1251 1252 1253 /* Create a 2D communicator */ 1254 PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d); 1255 lu->comm_2d = comm_2d; 1256 1257 /* Initialize PLAPACK */ 1258 PLA_Init(comm_2d); 1259 1260 /* Create object distribution template */ 1261 lu->templ = NULL; 1262 PLA_Temp_create(lu->nb, 0, &lu->templ); 1263 1264 /* Use suggested nb_alg if it is not provided by user */ 1265 if (lu->nb_alg == 0){ 1266 PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg); 1267 pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg); 1268 } 1269 1270 /* Set the datatype */ 1271 #if defined(PETSC_USE_COMPLEX) 1272 lu->datatype = MPI_DOUBLE_COMPLEX; 1273 #else 1274 lu->datatype = MPI_DOUBLE; 1275 #endif 1276 1277 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1278 lu->CleanUpPlapack = PETSC_TRUE; 1279 *F = B; 1280 PetscFunctionReturn(0); 1281 } 1282 1283 /* Note the Petsc r and c permutations are ignored */ 1284 #undef __FUNCT__ 1285 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1286 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 1287 { 1288 PetscErrorCode ierr; 1289 1290 PetscFunctionBegin; 1291 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1292 (*F)->factor = FACTOR_LU; 1293 PetscFunctionReturn(0); 1294 } 1295 1296 /* Note the Petsc perm permutation is ignored */ 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1299 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F) 1300 { 1301 PetscErrorCode ierr; 1302 PetscTruth issymmetric,set; 1303 1304 PetscFunctionBegin; 1305 ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr); 1306 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1307 ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr); 1308 (*F)->factor = FACTOR_CHOLESKY; 1309 PetscFunctionReturn(0); 1310 } 1311 #endif 1312 1313 /* -------------------------------------------------------------------*/ 1314 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1315 MatGetRow_MPIDense, 1316 MatRestoreRow_MPIDense, 1317 MatMult_MPIDense, 1318 /* 4*/ MatMultAdd_MPIDense, 1319 MatMultTranspose_MPIDense, 1320 MatMultTransposeAdd_MPIDense, 1321 #if defined(PETSC_HAVE_PLAPACK) 1322 MatSolve_MPIDense, 1323 #else 1324 0, 1325 #endif 1326 0, 1327 0, 1328 /*10*/ 0, 1329 0, 1330 0, 1331 0, 1332 MatTranspose_MPIDense, 1333 /*15*/ MatGetInfo_MPIDense, 1334 MatEqual_MPIDense, 1335 MatGetDiagonal_MPIDense, 1336 MatDiagonalScale_MPIDense, 1337 MatNorm_MPIDense, 1338 /*20*/ MatAssemblyBegin_MPIDense, 1339 MatAssemblyEnd_MPIDense, 1340 0, 1341 MatSetOption_MPIDense, 1342 MatZeroEntries_MPIDense, 1343 /*25*/ MatZeroRows_MPIDense, 1344 #if defined(PETSC_HAVE_PLAPACK) 1345 MatLUFactorSymbolic_MPIDense, 1346 MatLUFactorNumeric_MPIDense, 1347 MatCholeskyFactorSymbolic_MPIDense, 1348 MatCholeskyFactorNumeric_MPIDense, 1349 #else 1350 0, 1351 0, 1352 0, 1353 0, 1354 #endif 1355 /*30*/ MatSetUpPreallocation_MPIDense, 1356 0, 1357 0, 1358 MatGetArray_MPIDense, 1359 MatRestoreArray_MPIDense, 1360 /*35*/ MatDuplicate_MPIDense, 1361 0, 1362 0, 1363 0, 1364 0, 1365 /*40*/ 0, 1366 MatGetSubMatrices_MPIDense, 1367 0, 1368 MatGetValues_MPIDense, 1369 0, 1370 /*45*/ 0, 1371 MatScale_MPIDense, 1372 0, 1373 0, 1374 0, 1375 /*50*/ 0, 1376 0, 1377 0, 1378 0, 1379 0, 1380 /*55*/ 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 /*60*/ MatGetSubMatrix_MPIDense, 1386 MatDestroy_MPIDense, 1387 MatView_MPIDense, 1388 0, 1389 0, 1390 /*65*/ 0, 1391 0, 1392 0, 1393 0, 1394 0, 1395 /*70*/ 0, 1396 0, 1397 0, 1398 0, 1399 0, 1400 /*75*/ 0, 1401 0, 1402 0, 1403 0, 1404 0, 1405 /*80*/ 0, 1406 0, 1407 0, 1408 0, 1409 /*84*/ MatLoad_MPIDense, 1410 0, 1411 0, 1412 0, 1413 0, 1414 0, 1415 /*90*/ 0, 1416 MatMatMultSymbolic_MPIDense_MPIDense, 1417 0, 1418 0, 1419 0, 1420 /*95*/ 0, 1421 0, 1422 0, 1423 0}; 1424 1425 EXTERN_C_BEGIN 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1428 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1429 { 1430 Mat_MPIDense *a; 1431 PetscErrorCode ierr; 1432 1433 PetscFunctionBegin; 1434 mat->preallocated = PETSC_TRUE; 1435 /* Note: For now, when data is specified above, this assumes the user correctly 1436 allocates the local dense storage space. We should add error checking. */ 1437 1438 a = (Mat_MPIDense*)mat->data; 1439 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1440 ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr); 1441 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1442 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1443 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 EXTERN_C_END 1447 1448 EXTERN_C_BEGIN 1449 #if defined(PETSC_HAVE_PLAPACK) 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK" 1452 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type) 1453 { 1454 PetscFunctionBegin; 1455 /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */ 1456 PetscFunctionReturn(0); 1457 } 1458 #endif 1459 EXTERN_C_END 1460 1461 /*MC 1462 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1463 1464 Options Database Keys: 1465 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1466 1467 Level: beginner 1468 1469 MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR) 1470 for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed 1471 (run config/configure.py with the option --download-plapack) 1472 1473 1474 Options Database Keys: 1475 . -mat_plapack_nprows <n> - number of rows in processor partition 1476 . -mat_plapack_npcols <n> - number of columns in processor partition 1477 . -mat_plapack_nb <n> - block size of template vector 1478 . -mat_plapack_nb_alg <n> - algorithmic block size 1479 - -mat_plapack_ckerror <n> - error checking flag 1480 1481 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE 1482 M*/ 1483 1484 EXTERN_C_BEGIN 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatCreate_MPIDense" 1487 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1488 { 1489 Mat_MPIDense *a; 1490 PetscErrorCode ierr; 1491 #if defined(PETSC_HAVE_PLAPACK) 1492 Mat_Plapack *lu; 1493 #endif 1494 1495 PetscFunctionBegin; 1496 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1497 mat->data = (void*)a; 1498 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1499 mat->factor = 0; 1500 mat->mapping = 0; 1501 1502 mat->insertmode = NOT_SET_VALUES; 1503 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1504 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1505 1506 mat->rmap.bs = mat->cmap.bs = 1; 1507 ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr); 1508 ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr); 1509 a->nvec = mat->cmap.n; 1510 1511 /* build cache for off array entries formed */ 1512 a->donotstash = PETSC_FALSE; 1513 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1514 1515 /* stuff used for matrix vector multiply */ 1516 a->lvec = 0; 1517 a->Mvctx = 0; 1518 a->roworiented = PETSC_TRUE; 1519 1520 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1521 "MatGetDiagonalBlock_MPIDense", 1522 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1523 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1524 "MatMPIDenseSetPreallocation_MPIDense", 1525 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1526 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1527 "MatMatMult_MPIAIJ_MPIDense", 1528 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1530 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1531 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1533 "MatMatMultNumeric_MPIAIJ_MPIDense", 1534 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1535 #if defined(PETSC_HAVE_PLAPACK) 1536 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C", 1537 "MatSetSolverType_MPIDense_PLAPACK", 1538 MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr); 1539 #endif 1540 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1541 1542 #if defined(PETSC_HAVE_PLAPACK) 1543 ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr); 1544 lu->CleanUpPlapack = PETSC_FALSE; 1545 mat->spptr = (void*)lu; 1546 #endif 1547 PetscFunctionReturn(0); 1548 } 1549 EXTERN_C_END 1550 1551 /*MC 1552 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1553 1554 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1555 and MATMPIDENSE otherwise. 1556 1557 Options Database Keys: 1558 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1559 1560 Level: beginner 1561 1562 1563 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1564 M*/ 1565 1566 EXTERN_C_BEGIN 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatCreate_Dense" 1569 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1570 { 1571 PetscErrorCode ierr; 1572 PetscMPIInt size; 1573 1574 PetscFunctionBegin; 1575 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1576 if (size == 1) { 1577 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1578 } else { 1579 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 EXTERN_C_END 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1587 /*@C 1588 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1589 1590 Not collective 1591 1592 Input Parameters: 1593 . A - the matrix 1594 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1595 to control all matrix memory allocation. 1596 1597 Notes: 1598 The dense format is fully compatible with standard Fortran 77 1599 storage by columns. 1600 1601 The data input variable is intended primarily for Fortran programmers 1602 who wish to allocate their own matrix memory space. Most users should 1603 set data=PETSC_NULL. 1604 1605 Level: intermediate 1606 1607 .keywords: matrix,dense, parallel 1608 1609 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1610 @*/ 1611 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1612 { 1613 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1614 1615 PetscFunctionBegin; 1616 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1617 if (f) { 1618 ierr = (*f)(mat,data);CHKERRQ(ierr); 1619 } 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatCreateMPIDense" 1625 /*@C 1626 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1627 1628 Collective on MPI_Comm 1629 1630 Input Parameters: 1631 + comm - MPI communicator 1632 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1633 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1634 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1635 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1636 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1637 to control all matrix memory allocation. 1638 1639 Output Parameter: 1640 . A - the matrix 1641 1642 Notes: 1643 The dense format is fully compatible with standard Fortran 77 1644 storage by columns. 1645 1646 The data input variable is intended primarily for Fortran programmers 1647 who wish to allocate their own matrix memory space. Most users should 1648 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1649 1650 The user MUST specify either the local or global matrix dimensions 1651 (possibly both). 1652 1653 Level: intermediate 1654 1655 .keywords: matrix,dense, parallel 1656 1657 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1658 @*/ 1659 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1660 { 1661 PetscErrorCode ierr; 1662 PetscMPIInt size; 1663 1664 PetscFunctionBegin; 1665 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1666 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1667 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1668 if (size > 1) { 1669 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1670 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1671 } else { 1672 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1673 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1674 } 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatDuplicate_MPIDense" 1680 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1681 { 1682 Mat mat; 1683 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1684 PetscErrorCode ierr; 1685 1686 PetscFunctionBegin; 1687 *newmat = 0; 1688 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1689 ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr); 1690 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1691 a = (Mat_MPIDense*)mat->data; 1692 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1693 mat->factor = A->factor; 1694 mat->assembled = PETSC_TRUE; 1695 mat->preallocated = PETSC_TRUE; 1696 1697 mat->rmap.rstart = A->rmap.rstart; 1698 mat->rmap.rend = A->rmap.rend; 1699 a->size = oldmat->size; 1700 a->rank = oldmat->rank; 1701 mat->insertmode = NOT_SET_VALUES; 1702 a->nvec = oldmat->nvec; 1703 a->donotstash = oldmat->donotstash; 1704 1705 ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1706 ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1707 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1708 1709 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1710 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1711 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1712 1713 #if defined(PETSC_HAVE_PLAPACK) 1714 ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr); 1715 #endif 1716 *newmat = mat; 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #include "petscsys.h" 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1724 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat) 1725 { 1726 PetscErrorCode ierr; 1727 PetscMPIInt rank,size; 1728 PetscInt *rowners,i,m,nz,j; 1729 PetscScalar *array,*vals,*vals_ptr; 1730 MPI_Status status; 1731 1732 PetscFunctionBegin; 1733 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1734 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1735 1736 /* determine ownership of all rows */ 1737 m = M/size + ((M % size) > rank); 1738 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1739 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1740 rowners[0] = 0; 1741 for (i=2; i<=size; i++) { 1742 rowners[i] += rowners[i-1]; 1743 } 1744 1745 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1746 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1747 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1748 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1749 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1750 1751 if (!rank) { 1752 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1753 1754 /* read in my part of the matrix numerical values */ 1755 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1756 1757 /* insert into matrix-by row (this is why cannot directly read into array */ 1758 vals_ptr = vals; 1759 for (i=0; i<m; i++) { 1760 for (j=0; j<N; j++) { 1761 array[i + j*m] = *vals_ptr++; 1762 } 1763 } 1764 1765 /* read in other processors and ship out */ 1766 for (i=1; i<size; i++) { 1767 nz = (rowners[i+1] - rowners[i])*N; 1768 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1769 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1770 } 1771 } else { 1772 /* receive numeric values */ 1773 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1774 1775 /* receive message of values*/ 1776 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1777 1778 /* insert into matrix-by row (this is why cannot directly read into array */ 1779 vals_ptr = vals; 1780 for (i=0; i<m; i++) { 1781 for (j=0; j<N; j++) { 1782 array[i + j*m] = *vals_ptr++; 1783 } 1784 } 1785 } 1786 ierr = PetscFree(rowners);CHKERRQ(ierr); 1787 ierr = PetscFree(vals);CHKERRQ(ierr); 1788 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1789 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatLoad_MPIDense" 1795 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat) 1796 { 1797 Mat A; 1798 PetscScalar *vals,*svals; 1799 MPI_Comm comm = ((PetscObject)viewer)->comm; 1800 MPI_Status status; 1801 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1802 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1803 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1804 PetscInt i,nz,j,rstart,rend; 1805 int fd; 1806 PetscErrorCode ierr; 1807 1808 PetscFunctionBegin; 1809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1810 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1811 if (!rank) { 1812 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1813 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1814 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1815 } 1816 1817 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1818 M = header[1]; N = header[2]; nz = header[3]; 1819 1820 /* 1821 Handle case where matrix is stored on disk as a dense matrix 1822 */ 1823 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1824 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 /* determine ownership of all rows */ 1829 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1830 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1831 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1832 rowners[0] = 0; 1833 for (i=2; i<=size; i++) { 1834 rowners[i] += rowners[i-1]; 1835 } 1836 rstart = rowners[rank]; 1837 rend = rowners[rank+1]; 1838 1839 /* distribute row lengths to all processors */ 1840 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1841 offlens = ourlens + (rend-rstart); 1842 if (!rank) { 1843 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1844 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1845 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1846 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1847 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1848 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1849 } else { 1850 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1851 } 1852 1853 if (!rank) { 1854 /* calculate the number of nonzeros on each processor */ 1855 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1856 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1857 for (i=0; i<size; i++) { 1858 for (j=rowners[i]; j< rowners[i+1]; j++) { 1859 procsnz[i] += rowlengths[j]; 1860 } 1861 } 1862 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1863 1864 /* determine max buffer needed and allocate it */ 1865 maxnz = 0; 1866 for (i=0; i<size; i++) { 1867 maxnz = PetscMax(maxnz,procsnz[i]); 1868 } 1869 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1870 1871 /* read in my part of the matrix column indices */ 1872 nz = procsnz[0]; 1873 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1874 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1875 1876 /* read in every one elses and ship off */ 1877 for (i=1; i<size; i++) { 1878 nz = procsnz[i]; 1879 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1880 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1881 } 1882 ierr = PetscFree(cols);CHKERRQ(ierr); 1883 } else { 1884 /* determine buffer space needed for message */ 1885 nz = 0; 1886 for (i=0; i<m; i++) { 1887 nz += ourlens[i]; 1888 } 1889 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1890 1891 /* receive message of column indices*/ 1892 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1893 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1894 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1895 } 1896 1897 /* loop over local rows, determining number of off diagonal entries */ 1898 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1899 jj = 0; 1900 for (i=0; i<m; i++) { 1901 for (j=0; j<ourlens[i]; j++) { 1902 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1903 jj++; 1904 } 1905 } 1906 1907 /* create our matrix */ 1908 for (i=0; i<m; i++) { 1909 ourlens[i] -= offlens[i]; 1910 } 1911 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1912 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1913 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1914 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1915 A = *newmat; 1916 for (i=0; i<m; i++) { 1917 ourlens[i] += offlens[i]; 1918 } 1919 1920 if (!rank) { 1921 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1922 1923 /* read in my part of the matrix numerical values */ 1924 nz = procsnz[0]; 1925 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1926 1927 /* insert into matrix */ 1928 jj = rstart; 1929 smycols = mycols; 1930 svals = vals; 1931 for (i=0; i<m; i++) { 1932 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1933 smycols += ourlens[i]; 1934 svals += ourlens[i]; 1935 jj++; 1936 } 1937 1938 /* read in other processors and ship out */ 1939 for (i=1; i<size; i++) { 1940 nz = procsnz[i]; 1941 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1942 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 1943 } 1944 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1945 } else { 1946 /* receive numeric values */ 1947 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1948 1949 /* receive message of values*/ 1950 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 1951 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1952 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1953 1954 /* insert into matrix */ 1955 jj = rstart; 1956 smycols = mycols; 1957 svals = vals; 1958 for (i=0; i<m; i++) { 1959 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1960 smycols += ourlens[i]; 1961 svals += ourlens[i]; 1962 jj++; 1963 } 1964 } 1965 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1966 ierr = PetscFree(vals);CHKERRQ(ierr); 1967 ierr = PetscFree(mycols);CHKERRQ(ierr); 1968 ierr = PetscFree(rowners);CHKERRQ(ierr); 1969 1970 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1971 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatEqual_MPIDense" 1977 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 1978 { 1979 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 1980 Mat a,b; 1981 PetscTruth flg; 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 a = matA->A; 1986 b = matB->A; 1987 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1988 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 1989 PetscFunctionReturn(0); 1990 } 1991 1992 1993 1994