1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscTruth found; 358 359 PetscFunctionBegin; 360 /* first count number of contributors to each processor */ 361 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 362 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 363 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 364 for (i=0; i<N; i++) { 365 idx = rows[i]; 366 found = PETSC_FALSE; 367 for (j=0; j<size; j++) { 368 if (idx >= owners[j] && idx < owners[j+1]) { 369 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 370 } 371 } 372 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 373 } 374 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 375 376 /* inform other processors of number of messages and max length*/ 377 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 378 379 /* post receives: */ 380 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 381 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 382 for (i=0; i<nrecvs; i++) { 383 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 391 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 392 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 393 starts[0] = 0; 394 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 395 for (i=0; i<N; i++) { 396 svalues[starts[owner[i]]++] = rows[i]; 397 } 398 399 starts[0] = 0; 400 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 count = 0; 402 for (i=0; i<size; i++) { 403 if (nprocs[2*i+1]) { 404 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscFree(starts);CHKERRQ(ierr); 408 409 base = owners[rank]; 410 411 /* wait on receives */ 412 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 413 count = nrecvs; 414 slen = 0; 415 while (count) { 416 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 417 /* unpack receives into our local space */ 418 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 419 source[imdex] = recv_status.MPI_SOURCE; 420 lens[imdex] = n; 421 slen += n; 422 count--; 423 } 424 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 425 426 /* move the data into the send scatter */ 427 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 428 count = 0; 429 for (i=0; i<nrecvs; i++) { 430 values = rvalues + i*nmax; 431 for (j=0; j<lens[i]; j++) { 432 lrows[count++] = values[j] - base; 433 } 434 } 435 ierr = PetscFree(rvalues);CHKERRQ(ierr); 436 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 437 ierr = PetscFree(owner);CHKERRQ(ierr); 438 ierr = PetscFree(nprocs);CHKERRQ(ierr); 439 440 /* actually zap the local rows */ 441 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 442 ierr = PetscFree(lrows);CHKERRQ(ierr); 443 444 /* wait on sends */ 445 if (nsends) { 446 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 447 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 448 ierr = PetscFree(send_status);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(send_waits);CHKERRQ(ierr); 451 ierr = PetscFree(svalues);CHKERRQ(ierr); 452 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_MPIDense" 458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMultAdd_MPIDense" 472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultTranspose_MPIDense" 486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 487 { 488 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 489 PetscErrorCode ierr; 490 PetscScalar zero = 0.0; 491 492 PetscFunctionBegin; 493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 509 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatGetDiagonal_MPIDense" 517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 521 PetscErrorCode ierr; 522 PetscInt len,i,n,m = A->rmap->n,radd; 523 PetscScalar *x,zero = 0.0; 524 525 PetscFunctionBegin; 526 ierr = VecSet(v,zero);CHKERRQ(ierr); 527 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 528 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 529 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 530 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 531 radd = A->rmap->rstart*m; 532 for (i=0; i<len; i++) { 533 x[i] = aloc->v[radd + i*m + i]; 534 } 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatDestroy_MPIDense" 541 PetscErrorCode MatDestroy_MPIDense(Mat mat) 542 { 543 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 544 PetscErrorCode ierr; 545 #if defined(PETSC_HAVE_PLAPACK) 546 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 547 #endif 548 549 PetscFunctionBegin; 550 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 556 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 557 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 558 #if defined(PETSC_HAVE_PLAPACK) 559 if (lu) { 560 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 561 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 562 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 563 564 if (lu->is_pla) { 565 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 566 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 567 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 568 } 569 } 570 #endif 571 572 ierr = PetscFree(mdn);CHKERRQ(ierr); 573 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 MPI_Status status; 595 596 PetscFunctionBegin; 597 if (mdn->size == 1) { 598 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 601 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 602 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 603 604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 605 if (format == PETSC_VIEWER_NATIVE) { 606 607 if (!rank) { 608 /* store the matrix as a dense matrix */ 609 header[0] = MAT_FILE_CLASSID; 610 header[1] = mat->rmap->N; 611 header[2] = N; 612 header[3] = MATRIX_BINARY_FORMAT_DENSE; 613 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 614 615 /* get largest work array needed for transposing array */ 616 mmax = mat->rmap->n; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 621 622 /* write out local array, by rows */ 623 m = mat->rmap->n; 624 v = a->v; 625 for (j=0; j<N; j++) { 626 for (i=0; i<m; i++) { 627 work[j + i*N] = *v++; 628 } 629 } 630 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 631 /* get largest work array to receive messages from other processes, excludes process zero */ 632 mmax = 0; 633 for (i=1; i<size; i++) { 634 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 635 } 636 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 637 for(k = 1; k < size; k++) { 638 v = vv; 639 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 640 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 641 642 for(j = 0; j < N; j++) { 643 for(i = 0; i < m; i++) { 644 work[j + i*N] = *v++; 645 } 646 } 647 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 648 } 649 ierr = PetscFree(work);CHKERRQ(ierr); 650 ierr = PetscFree(vv);CHKERRQ(ierr); 651 } else { 652 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 } 654 } else { 655 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 666 PetscErrorCode ierr; 667 PetscMPIInt size = mdn->size,rank = mdn->rank; 668 const PetscViewerType vtype; 669 PetscTruth iascii,isdraw; 670 PetscViewer sviewer; 671 PetscViewerFormat format; 672 #if defined(PETSC_HAVE_PLAPACK) 673 Mat_Plapack *lu; 674 #endif 675 676 PetscFunctionBegin; 677 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 678 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 679 if (iascii) { 680 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 681 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 682 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 683 MatInfo info; 684 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 686 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 688 #if defined(PETSC_HAVE_PLAPACK) 689 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 691 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 693 if (mat->factortype){ 694 lu=(Mat_Plapack*)(mat->spptr); 695 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 696 } 697 #else 698 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 699 #endif 700 PetscFunctionReturn(0); 701 } else if (format == PETSC_VIEWER_ASCII_INFO) { 702 PetscFunctionReturn(0); 703 } 704 } else if (isdraw) { 705 PetscDraw draw; 706 PetscTruth isnull; 707 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 710 if (isnull) PetscFunctionReturn(0); 711 } 712 713 if (size == 1) { 714 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 719 PetscInt *cols; 720 PetscScalar *vals; 721 722 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 723 if (!rank) { 724 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 725 } else { 726 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 727 } 728 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 729 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 730 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 731 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 732 733 /* Copy the matrix ... This isn't the most efficient means, 734 but it's quick for now */ 735 A->insertmode = INSERT_VALUES; 736 row = mat->rmap->rstart; m = mdn->A->rmap->n; 737 for (i=0; i<m; i++) { 738 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 739 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 740 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 741 row++; 742 } 743 744 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 747 if (!rank) { 748 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 749 } 750 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 751 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 752 ierr = MatDestroy(A);CHKERRQ(ierr); 753 } 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatView_MPIDense" 759 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 760 { 761 PetscErrorCode ierr; 762 PetscTruth iascii,isbinary,isdraw,issocket; 763 764 PetscFunctionBegin; 765 766 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 767 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 768 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 769 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 770 771 if (iascii || issocket || isdraw) { 772 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 773 } else if (isbinary) { 774 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 775 } else { 776 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 777 } 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatGetInfo_MPIDense" 783 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 784 { 785 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 786 Mat mdn = mat->A; 787 PetscErrorCode ierr; 788 PetscReal isend[5],irecv[5]; 789 790 PetscFunctionBegin; 791 info->block_size = 1.0; 792 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 793 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 794 isend[3] = info->memory; isend[4] = info->mallocs; 795 if (flag == MAT_LOCAL) { 796 info->nz_used = isend[0]; 797 info->nz_allocated = isend[1]; 798 info->nz_unneeded = isend[2]; 799 info->memory = isend[3]; 800 info->mallocs = isend[4]; 801 } else if (flag == MAT_GLOBAL_MAX) { 802 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 803 info->nz_used = irecv[0]; 804 info->nz_allocated = irecv[1]; 805 info->nz_unneeded = irecv[2]; 806 info->memory = irecv[3]; 807 info->mallocs = irecv[4]; 808 } else if (flag == MAT_GLOBAL_SUM) { 809 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 810 info->nz_used = irecv[0]; 811 info->nz_allocated = irecv[1]; 812 info->nz_unneeded = irecv[2]; 813 info->memory = irecv[3]; 814 info->mallocs = irecv[4]; 815 } 816 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 817 info->fill_ratio_needed = 0; 818 info->factor_mallocs = 0; 819 PetscFunctionReturn(0); 820 } 821 822 #undef __FUNCT__ 823 #define __FUNCT__ "MatSetOption_MPIDense" 824 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 825 { 826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 827 PetscErrorCode ierr; 828 829 PetscFunctionBegin; 830 switch (op) { 831 case MAT_NEW_NONZERO_LOCATIONS: 832 case MAT_NEW_NONZERO_LOCATION_ERR: 833 case MAT_NEW_NONZERO_ALLOCATION_ERR: 834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 835 break; 836 case MAT_ROW_ORIENTED: 837 a->roworiented = flg; 838 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 839 break; 840 case MAT_NEW_DIAGONALS: 841 case MAT_USE_HASH_TABLE: 842 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 843 break; 844 case MAT_IGNORE_OFF_PROC_ENTRIES: 845 a->donotstash = flg; 846 break; 847 case MAT_SYMMETRIC: 848 case MAT_STRUCTURALLY_SYMMETRIC: 849 case MAT_HERMITIAN: 850 case MAT_SYMMETRY_ETERNAL: 851 case MAT_IGNORE_LOWER_TRIANGULAR: 852 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 853 break; 854 default: 855 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatDiagonalScale_MPIDense" 863 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 864 { 865 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 866 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 867 PetscScalar *l,*r,x,*v; 868 PetscErrorCode ierr; 869 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 870 871 PetscFunctionBegin; 872 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 873 if (ll) { 874 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 875 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 876 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 877 for (i=0; i<m; i++) { 878 x = l[i]; 879 v = mat->v + i; 880 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 881 } 882 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 883 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 884 } 885 if (rr) { 886 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 887 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 888 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 891 for (i=0; i<n; i++) { 892 x = r[i]; 893 v = mat->v + i*m; 894 for (j=0; j<m; j++) { (*v++) *= x;} 895 } 896 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 897 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 898 } 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatNorm_MPIDense" 904 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 905 { 906 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 907 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 908 PetscErrorCode ierr; 909 PetscInt i,j; 910 PetscReal sum = 0.0; 911 PetscScalar *v = mat->v; 912 913 PetscFunctionBegin; 914 if (mdn->size == 1) { 915 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 916 } else { 917 if (type == NORM_FROBENIUS) { 918 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 919 #if defined(PETSC_USE_COMPLEX) 920 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 921 #else 922 sum += (*v)*(*v); v++; 923 #endif 924 } 925 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 926 *nrm = sqrt(*nrm); 927 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 928 } else if (type == NORM_1) { 929 PetscReal *tmp,*tmp2; 930 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 931 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 932 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 933 *nrm = 0.0; 934 v = mat->v; 935 for (j=0; j<mdn->A->cmap->n; j++) { 936 for (i=0; i<mdn->A->rmap->n; i++) { 937 tmp[j] += PetscAbsScalar(*v); v++; 938 } 939 } 940 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 941 for (j=0; j<A->cmap->N; j++) { 942 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 943 } 944 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 945 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 946 } else if (type == NORM_INFINITY) { /* max row norm */ 947 PetscReal ntemp; 948 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 949 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 950 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatTranspose_MPIDense" 957 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 958 { 959 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 960 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 961 Mat B; 962 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 963 PetscErrorCode ierr; 964 PetscInt j,i; 965 PetscScalar *v; 966 967 PetscFunctionBegin; 968 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 969 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 970 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 971 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 972 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 973 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 974 } else { 975 B = *matout; 976 } 977 978 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 979 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 980 for (i=0; i<m; i++) rwork[i] = rstart + i; 981 for (j=0; j<n; j++) { 982 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 983 v += m; 984 } 985 ierr = PetscFree(rwork);CHKERRQ(ierr); 986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 988 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 989 *matout = B; 990 } else { 991 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1002 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1003 { 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #if defined(PETSC_HAVE_PLAPACK) 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1015 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1016 { 1017 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1018 PetscErrorCode ierr; 1019 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1020 PetscScalar *array; 1021 PetscReal one = 1.0; 1022 1023 PetscFunctionBegin; 1024 /* Copy A into F->lu->A */ 1025 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1026 ierr = PLA_API_begin();CHKERRQ(ierr); 1027 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1028 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1029 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1030 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1031 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1032 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1033 ierr = PLA_API_end();CHKERRQ(ierr); 1034 lu->rstart = rstart; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1040 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1041 { 1042 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1043 PetscErrorCode ierr; 1044 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1045 PetscScalar *array; 1046 PetscReal one = 1.0; 1047 1048 PetscFunctionBegin; 1049 /* Copy F into A->lu->A */ 1050 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1051 ierr = PLA_API_begin();CHKERRQ(ierr); 1052 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1053 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1054 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1055 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1056 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1057 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1058 ierr = PLA_API_end();CHKERRQ(ierr); 1059 lu->rstart = rstart; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1065 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1066 { 1067 PetscErrorCode ierr; 1068 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1069 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1070 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1071 PLA_Obj alpha = NULL,beta = NULL; 1072 1073 PetscFunctionBegin; 1074 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1075 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1076 1077 /* 1078 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1079 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1080 */ 1081 1082 /* do the multiply in PLA */ 1083 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1084 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1085 CHKMEMQ; 1086 1087 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1088 CHKMEMQ; 1089 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1090 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1091 1092 /* 1093 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1094 */ 1095 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1101 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1102 { 1103 PetscErrorCode ierr; 1104 PetscInt m=A->rmap->n,n=B->cmap->n; 1105 Mat Cmat; 1106 1107 PetscFunctionBegin; 1108 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1109 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1110 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1111 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1112 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1113 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1114 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1115 1116 *C = Cmat; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1122 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1123 { 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 if (scall == MAT_INITIAL_MATRIX){ 1128 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1129 } 1130 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatSolve_MPIDense" 1136 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1137 { 1138 MPI_Comm comm = ((PetscObject)A)->comm; 1139 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1140 PetscErrorCode ierr; 1141 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1142 PetscScalar *array; 1143 PetscReal one = 1.0; 1144 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1145 PLA_Obj v_pla = NULL; 1146 PetscScalar *loc_buf; 1147 Vec loc_x; 1148 1149 PetscFunctionBegin; 1150 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1151 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1152 1153 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1154 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1155 PLA_Obj_set_to_zero(v_pla); 1156 1157 /* Copy b into rhs_pla */ 1158 PLA_API_begin(); 1159 PLA_Obj_API_open(v_pla); 1160 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1161 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1162 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1163 PLA_Obj_API_close(v_pla); 1164 PLA_API_end(); 1165 1166 if (A->factortype == MAT_FACTOR_LU){ 1167 /* Apply the permutations to the right hand sides */ 1168 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1169 1170 /* Solve L y = b, overwriting b with y */ 1171 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1172 1173 /* Solve U x = y (=b), overwriting b with x */ 1174 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1175 } else { /* MAT_FACTOR_CHOLESKY */ 1176 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1177 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1178 PLA_NONUNIT_DIAG,lu->A,v_pla); 1179 } 1180 1181 /* Copy PLAPACK x into Petsc vector x */ 1182 PLA_Obj_local_length(v_pla, &loc_m); 1183 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1184 PLA_Obj_local_stride(v_pla, &loc_stride); 1185 /* 1186 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1187 */ 1188 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1189 if (!lu->pla_solved){ 1190 1191 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1192 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1193 1194 /* Create IS and cts for VecScatterring */ 1195 PLA_Obj_local_length(v_pla, &loc_m); 1196 PLA_Obj_local_stride(v_pla, &loc_stride); 1197 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1198 1199 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1200 for (i=0; i<loc_m; i+=lu->nb){ 1201 j = 0; 1202 while (j < lu->nb && i+j < loc_m){ 1203 idx_petsc[i+j] = rstart + j; j++; 1204 } 1205 rstart += size*lu->nb; 1206 } 1207 1208 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1209 1210 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1211 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1212 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1213 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1214 } 1215 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1216 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1217 1218 /* Free data */ 1219 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1220 PLA_Obj_free(&v_pla); 1221 1222 lu->pla_solved = PETSC_TRUE; 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1228 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1229 { 1230 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1231 PetscErrorCode ierr; 1232 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1233 PetscInt info_pla=0; 1234 PetscScalar *array,one = 1.0; 1235 1236 PetscFunctionBegin; 1237 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1238 PLA_Obj_free(&lu->A); 1239 PLA_Obj_free (&lu->pivots); 1240 } 1241 /* Create PLAPACK matrix object */ 1242 lu->A = NULL; lu->pivots = NULL; 1243 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1244 PLA_Obj_set_to_zero(lu->A); 1245 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1246 1247 /* Copy A into lu->A */ 1248 PLA_API_begin(); 1249 PLA_Obj_API_open(lu->A); 1250 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1251 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1252 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1253 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1254 PLA_Obj_API_close(lu->A); 1255 PLA_API_end(); 1256 1257 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1258 info_pla = PLA_LU(lu->A,lu->pivots); 1259 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1260 1261 lu->rstart = rstart; 1262 lu->mstruct = SAME_NONZERO_PATTERN; 1263 F->ops->solve = MatSolve_MPIDense; 1264 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1270 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1271 { 1272 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1273 PetscErrorCode ierr; 1274 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1275 PetscInt info_pla=0; 1276 PetscScalar *array,one = 1.0; 1277 1278 PetscFunctionBegin; 1279 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1280 PLA_Obj_free(&lu->A); 1281 } 1282 /* Create PLAPACK matrix object */ 1283 lu->A = NULL; 1284 lu->pivots = NULL; 1285 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1286 1287 /* Copy A into lu->A */ 1288 PLA_API_begin(); 1289 PLA_Obj_API_open(lu->A); 1290 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1291 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1292 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1293 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1294 PLA_Obj_API_close(lu->A); 1295 PLA_API_end(); 1296 1297 /* Factor P A -> Chol */ 1298 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1299 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1300 1301 lu->rstart = rstart; 1302 lu->mstruct = SAME_NONZERO_PATTERN; 1303 F->ops->solve = MatSolve_MPIDense; 1304 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* Note the Petsc perm permutation is ignored */ 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1311 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1312 { 1313 PetscErrorCode ierr; 1314 PetscTruth issymmetric,set; 1315 1316 PetscFunctionBegin; 1317 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1318 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1319 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* Note the Petsc r and c permutations are ignored */ 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1326 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1327 { 1328 PetscErrorCode ierr; 1329 PetscInt M = A->rmap->N; 1330 Mat_Plapack *lu; 1331 1332 PetscFunctionBegin; 1333 lu = (Mat_Plapack*)F->spptr; 1334 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1335 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1336 PetscFunctionReturn(0); 1337 } 1338 1339 EXTERN_C_BEGIN 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1342 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1343 { 1344 PetscFunctionBegin; 1345 *type = MATSOLVERPLAPACK; 1346 PetscFunctionReturn(0); 1347 } 1348 EXTERN_C_END 1349 1350 EXTERN_C_BEGIN 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1353 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1354 { 1355 PetscErrorCode ierr; 1356 Mat_Plapack *lu; 1357 PetscMPIInt size; 1358 PetscInt M=A->rmap->N; 1359 1360 PetscFunctionBegin; 1361 /* Create the factorization matrix */ 1362 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1363 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1364 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1365 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1366 (*F)->spptr = (void*)lu; 1367 1368 /* Set default Plapack parameters */ 1369 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1370 lu->nb = M/size; 1371 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1372 1373 /* Set runtime options */ 1374 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1375 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1376 PetscOptionsEnd(); 1377 1378 /* Create object distribution template */ 1379 lu->templ = NULL; 1380 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1381 1382 /* Set the datatype */ 1383 #if defined(PETSC_USE_COMPLEX) 1384 lu->datatype = MPI_DOUBLE_COMPLEX; 1385 #else 1386 lu->datatype = MPI_DOUBLE; 1387 #endif 1388 1389 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1390 1391 1392 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1393 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1394 1395 if (ftype == MAT_FACTOR_LU) { 1396 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1397 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1398 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1399 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1400 (*F)->factortype = ftype; 1401 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 EXTERN_C_END 1405 #endif 1406 1407 #undef __FUNCT__ 1408 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1409 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1410 { 1411 #if defined(PETSC_HAVE_PLAPACK) 1412 PetscErrorCode ierr; 1413 #endif 1414 1415 PetscFunctionBegin; 1416 #if defined(PETSC_HAVE_PLAPACK) 1417 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1418 #else 1419 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1420 #endif 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatAXPY_MPIDense" 1426 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1427 { 1428 PetscErrorCode ierr; 1429 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1430 1431 PetscFunctionBegin; 1432 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatConjugate_MPIDense" 1438 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1439 { 1440 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1441 PetscErrorCode ierr; 1442 1443 PetscFunctionBegin; 1444 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatRealPart_MPIDense" 1450 PetscErrorCode MatRealPart_MPIDense(Mat A) 1451 { 1452 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1453 PetscErrorCode ierr; 1454 1455 PetscFunctionBegin; 1456 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1462 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1463 { 1464 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } 1471 1472 /* -------------------------------------------------------------------*/ 1473 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1474 MatGetRow_MPIDense, 1475 MatRestoreRow_MPIDense, 1476 MatMult_MPIDense, 1477 /* 4*/ MatMultAdd_MPIDense, 1478 MatMultTranspose_MPIDense, 1479 MatMultTransposeAdd_MPIDense, 1480 0, 1481 0, 1482 0, 1483 /*10*/ 0, 1484 0, 1485 0, 1486 0, 1487 MatTranspose_MPIDense, 1488 /*15*/ MatGetInfo_MPIDense, 1489 MatEqual_MPIDense, 1490 MatGetDiagonal_MPIDense, 1491 MatDiagonalScale_MPIDense, 1492 MatNorm_MPIDense, 1493 /*20*/ MatAssemblyBegin_MPIDense, 1494 MatAssemblyEnd_MPIDense, 1495 MatSetOption_MPIDense, 1496 MatZeroEntries_MPIDense, 1497 /*24*/ MatZeroRows_MPIDense, 1498 0, 1499 0, 1500 0, 1501 0, 1502 /*29*/ MatSetUpPreallocation_MPIDense, 1503 0, 1504 0, 1505 MatGetArray_MPIDense, 1506 MatRestoreArray_MPIDense, 1507 /*34*/ MatDuplicate_MPIDense, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*39*/ MatAXPY_MPIDense, 1513 MatGetSubMatrices_MPIDense, 1514 0, 1515 MatGetValues_MPIDense, 1516 0, 1517 /*44*/ 0, 1518 MatScale_MPIDense, 1519 0, 1520 0, 1521 0, 1522 /*49*/ 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*54*/ 0, 1528 0, 1529 0, 1530 0, 1531 0, 1532 /*59*/ MatGetSubMatrix_MPIDense, 1533 MatDestroy_MPIDense, 1534 MatView_MPIDense, 1535 0, 1536 0, 1537 /*64*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*69*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*74*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*79*/ 0, 1553 0, 1554 0, 1555 0, 1556 /*83*/ 0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*89*/ 1563 #if defined(PETSC_HAVE_PLAPACK) 1564 MatMatMult_MPIDense_MPIDense, 1565 MatMatMultSymbolic_MPIDense_MPIDense, 1566 MatMatMultNumeric_MPIDense_MPIDense, 1567 #else 1568 0, 1569 0, 1570 0, 1571 #endif 1572 0, 1573 0, 1574 /*94*/ 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 /*99*/ 0, 1580 0, 1581 0, 1582 MatConjugate_MPIDense, 1583 0, 1584 /*104*/0, 1585 MatRealPart_MPIDense, 1586 MatImaginaryPart_MPIDense, 1587 0, 1588 0, 1589 /*109*/0, 1590 0, 1591 0, 1592 0, 1593 0, 1594 /*114*/0, 1595 0, 1596 0, 1597 0, 1598 0, 1599 /*119*/0, 1600 0, 1601 0, 1602 0, 1603 MatLoadnew_MPIDense 1604 }; 1605 1606 EXTERN_C_BEGIN 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1609 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1610 { 1611 Mat_MPIDense *a; 1612 PetscErrorCode ierr; 1613 1614 PetscFunctionBegin; 1615 mat->preallocated = PETSC_TRUE; 1616 /* Note: For now, when data is specified above, this assumes the user correctly 1617 allocates the local dense storage space. We should add error checking. */ 1618 1619 a = (Mat_MPIDense*)mat->data; 1620 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1621 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1622 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1623 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1624 a->nvec = mat->cmap->n; 1625 1626 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1627 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1628 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1629 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1630 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1631 PetscFunctionReturn(0); 1632 } 1633 EXTERN_C_END 1634 1635 /*MC 1636 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1637 1638 run ./configure with the option --download-plapack 1639 1640 1641 Options Database Keys: 1642 . -mat_plapack_nprows <n> - number of rows in processor partition 1643 . -mat_plapack_npcols <n> - number of columns in processor partition 1644 . -mat_plapack_nb <n> - block size of template vector 1645 . -mat_plapack_nb_alg <n> - algorithmic block size 1646 - -mat_plapack_ckerror <n> - error checking flag 1647 1648 Level: intermediate 1649 1650 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1651 1652 M*/ 1653 1654 EXTERN_C_BEGIN 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "MatCreate_MPIDense" 1657 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1658 { 1659 Mat_MPIDense *a; 1660 PetscErrorCode ierr; 1661 1662 PetscFunctionBegin; 1663 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1664 mat->data = (void*)a; 1665 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1666 mat->mapping = 0; 1667 1668 mat->insertmode = NOT_SET_VALUES; 1669 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1670 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1671 1672 /* build cache for off array entries formed */ 1673 a->donotstash = PETSC_FALSE; 1674 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1675 1676 /* stuff used for matrix vector multiply */ 1677 a->lvec = 0; 1678 a->Mvctx = 0; 1679 a->roworiented = PETSC_TRUE; 1680 1681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1682 "MatGetDiagonalBlock_MPIDense", 1683 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1685 "MatMPIDenseSetPreallocation_MPIDense", 1686 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1688 "MatMatMult_MPIAIJ_MPIDense", 1689 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1691 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1692 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1693 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1694 "MatMatMultNumeric_MPIAIJ_MPIDense", 1695 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1696 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1697 "MatGetFactor_mpidense_petsc", 1698 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1699 #if defined(PETSC_HAVE_PLAPACK) 1700 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1701 "MatGetFactor_mpidense_plapack", 1702 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1703 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1704 #endif 1705 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1706 1707 PetscFunctionReturn(0); 1708 } 1709 EXTERN_C_END 1710 1711 /*MC 1712 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1713 1714 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1715 and MATMPIDENSE otherwise. 1716 1717 Options Database Keys: 1718 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1719 1720 Level: beginner 1721 1722 1723 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1724 M*/ 1725 1726 EXTERN_C_BEGIN 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatCreate_Dense" 1729 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1730 { 1731 PetscErrorCode ierr; 1732 PetscMPIInt size; 1733 1734 PetscFunctionBegin; 1735 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1736 if (size == 1) { 1737 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1738 } else { 1739 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 EXTERN_C_END 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1747 /*@C 1748 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1749 1750 Not collective 1751 1752 Input Parameters: 1753 . A - the matrix 1754 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1755 to control all matrix memory allocation. 1756 1757 Notes: 1758 The dense format is fully compatible with standard Fortran 77 1759 storage by columns. 1760 1761 The data input variable is intended primarily for Fortran programmers 1762 who wish to allocate their own matrix memory space. Most users should 1763 set data=PETSC_NULL. 1764 1765 Level: intermediate 1766 1767 .keywords: matrix,dense, parallel 1768 1769 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1770 @*/ 1771 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1772 { 1773 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1774 1775 PetscFunctionBegin; 1776 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1777 if (f) { 1778 ierr = (*f)(mat,data);CHKERRQ(ierr); 1779 } 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatCreateMPIDense" 1785 /*@C 1786 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1787 1788 Collective on MPI_Comm 1789 1790 Input Parameters: 1791 + comm - MPI communicator 1792 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1793 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1794 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1795 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1796 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1797 to control all matrix memory allocation. 1798 1799 Output Parameter: 1800 . A - the matrix 1801 1802 Notes: 1803 The dense format is fully compatible with standard Fortran 77 1804 storage by columns. 1805 1806 The data input variable is intended primarily for Fortran programmers 1807 who wish to allocate their own matrix memory space. Most users should 1808 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1809 1810 The user MUST specify either the local or global matrix dimensions 1811 (possibly both). 1812 1813 Level: intermediate 1814 1815 .keywords: matrix,dense, parallel 1816 1817 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1818 @*/ 1819 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1820 { 1821 PetscErrorCode ierr; 1822 PetscMPIInt size; 1823 1824 PetscFunctionBegin; 1825 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1826 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1827 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1828 if (size > 1) { 1829 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1830 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1831 } else { 1832 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1833 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatDuplicate_MPIDense" 1840 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1841 { 1842 Mat mat; 1843 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1844 PetscErrorCode ierr; 1845 1846 PetscFunctionBegin; 1847 *newmat = 0; 1848 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1849 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1850 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1851 a = (Mat_MPIDense*)mat->data; 1852 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1853 mat->factortype = A->factortype; 1854 mat->assembled = PETSC_TRUE; 1855 mat->preallocated = PETSC_TRUE; 1856 1857 mat->rmap->rstart = A->rmap->rstart; 1858 mat->rmap->rend = A->rmap->rend; 1859 a->size = oldmat->size; 1860 a->rank = oldmat->rank; 1861 mat->insertmode = NOT_SET_VALUES; 1862 a->nvec = oldmat->nvec; 1863 a->donotstash = oldmat->donotstash; 1864 1865 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1866 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1867 1868 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1869 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1870 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1871 1872 *newmat = mat; 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1878 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1879 { 1880 PetscErrorCode ierr; 1881 PetscMPIInt rank,size; 1882 PetscInt *rowners,i,m,nz,j; 1883 PetscScalar *array,*vals,*vals_ptr; 1884 MPI_Status status; 1885 1886 PetscFunctionBegin; 1887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1889 1890 /* determine ownership of all rows */ 1891 m = M/size + ((M % size) > rank); 1892 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1893 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1894 rowners[0] = 0; 1895 for (i=2; i<=size; i++) { 1896 rowners[i] += rowners[i-1]; 1897 } 1898 1899 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1900 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1901 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1902 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1903 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1904 1905 if (!rank) { 1906 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1907 1908 /* read in my part of the matrix numerical values */ 1909 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1910 1911 /* insert into matrix-by row (this is why cannot directly read into array */ 1912 vals_ptr = vals; 1913 for (i=0; i<m; i++) { 1914 for (j=0; j<N; j++) { 1915 array[i + j*m] = *vals_ptr++; 1916 } 1917 } 1918 1919 /* read in other processors and ship out */ 1920 for (i=1; i<size; i++) { 1921 nz = (rowners[i+1] - rowners[i])*N; 1922 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1923 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1924 } 1925 } else { 1926 /* receive numeric values */ 1927 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1928 1929 /* receive message of values*/ 1930 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1931 1932 /* insert into matrix-by row (this is why cannot directly read into array */ 1933 vals_ptr = vals; 1934 for (i=0; i<m; i++) { 1935 for (j=0; j<N; j++) { 1936 array[i + j*m] = *vals_ptr++; 1937 } 1938 } 1939 } 1940 ierr = PetscFree(rowners);CHKERRQ(ierr); 1941 ierr = PetscFree(vals);CHKERRQ(ierr); 1942 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1943 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatLoad_MPIDense" 1949 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1950 { 1951 Mat A; 1952 PetscScalar *vals,*svals; 1953 MPI_Comm comm = ((PetscObject)viewer)->comm; 1954 MPI_Status status; 1955 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1956 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1957 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1958 PetscInt i,nz,j,rstart,rend; 1959 int fd; 1960 PetscErrorCode ierr; 1961 1962 PetscFunctionBegin; 1963 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1964 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1965 if (!rank) { 1966 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1967 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1968 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1969 } 1970 1971 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1972 M = header[1]; N = header[2]; nz = header[3]; 1973 1974 /* 1975 Handle case where matrix is stored on disk as a dense matrix 1976 */ 1977 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1978 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1979 PetscFunctionReturn(0); 1980 } 1981 1982 /* determine ownership of all rows */ 1983 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1984 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1985 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1986 rowners[0] = 0; 1987 for (i=2; i<=size; i++) { 1988 rowners[i] += rowners[i-1]; 1989 } 1990 rstart = rowners[rank]; 1991 rend = rowners[rank+1]; 1992 1993 /* distribute row lengths to all processors */ 1994 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1995 if (!rank) { 1996 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1997 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1998 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1999 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2000 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2001 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2002 } else { 2003 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2004 } 2005 2006 if (!rank) { 2007 /* calculate the number of nonzeros on each processor */ 2008 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2009 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2010 for (i=0; i<size; i++) { 2011 for (j=rowners[i]; j< rowners[i+1]; j++) { 2012 procsnz[i] += rowlengths[j]; 2013 } 2014 } 2015 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2016 2017 /* determine max buffer needed and allocate it */ 2018 maxnz = 0; 2019 for (i=0; i<size; i++) { 2020 maxnz = PetscMax(maxnz,procsnz[i]); 2021 } 2022 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2023 2024 /* read in my part of the matrix column indices */ 2025 nz = procsnz[0]; 2026 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2027 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2028 2029 /* read in every one elses and ship off */ 2030 for (i=1; i<size; i++) { 2031 nz = procsnz[i]; 2032 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2033 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2034 } 2035 ierr = PetscFree(cols);CHKERRQ(ierr); 2036 } else { 2037 /* determine buffer space needed for message */ 2038 nz = 0; 2039 for (i=0; i<m; i++) { 2040 nz += ourlens[i]; 2041 } 2042 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2043 2044 /* receive message of column indices*/ 2045 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2046 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2047 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2048 } 2049 2050 /* loop over local rows, determining number of off diagonal entries */ 2051 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2052 jj = 0; 2053 for (i=0; i<m; i++) { 2054 for (j=0; j<ourlens[i]; j++) { 2055 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2056 jj++; 2057 } 2058 } 2059 2060 /* create our matrix */ 2061 for (i=0; i<m; i++) { 2062 ourlens[i] -= offlens[i]; 2063 } 2064 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2065 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2066 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2067 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2068 A = *newmat; 2069 for (i=0; i<m; i++) { 2070 ourlens[i] += offlens[i]; 2071 } 2072 2073 if (!rank) { 2074 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2075 2076 /* read in my part of the matrix numerical values */ 2077 nz = procsnz[0]; 2078 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2079 2080 /* insert into matrix */ 2081 jj = rstart; 2082 smycols = mycols; 2083 svals = vals; 2084 for (i=0; i<m; i++) { 2085 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2086 smycols += ourlens[i]; 2087 svals += ourlens[i]; 2088 jj++; 2089 } 2090 2091 /* read in other processors and ship out */ 2092 for (i=1; i<size; i++) { 2093 nz = procsnz[i]; 2094 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2095 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2096 } 2097 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2098 } else { 2099 /* receive numeric values */ 2100 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2101 2102 /* receive message of values*/ 2103 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2104 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2105 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2106 2107 /* insert into matrix */ 2108 jj = rstart; 2109 smycols = mycols; 2110 svals = vals; 2111 for (i=0; i<m; i++) { 2112 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2113 smycols += ourlens[i]; 2114 svals += ourlens[i]; 2115 jj++; 2116 } 2117 } 2118 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2119 ierr = PetscFree(vals);CHKERRQ(ierr); 2120 ierr = PetscFree(mycols);CHKERRQ(ierr); 2121 ierr = PetscFree(rowners);CHKERRQ(ierr); 2122 2123 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2124 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatLoadnew_MPIDense_DenseInFile" 2130 PetscErrorCode MatLoadnew_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 2131 { 2132 PetscErrorCode ierr; 2133 PetscMPIInt rank,size; 2134 PetscInt *rowners,i,m,nz,j; 2135 PetscScalar *array,*vals,*vals_ptr; 2136 MPI_Status status; 2137 2138 PetscFunctionBegin; 2139 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2140 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2141 2142 /* determine ownership of all rows */ 2143 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 2144 else m = newmat->rmap->n; 2145 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2146 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2147 rowners[0] = 0; 2148 for (i=2; i<=size; i++) { 2149 rowners[i] += rowners[i-1]; 2150 } 2151 2152 if (!sizesset) { 2153 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2154 } 2155 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2156 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 2157 2158 if (!rank) { 2159 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2160 2161 /* read in my part of the matrix numerical values */ 2162 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 2163 2164 /* insert into matrix-by row (this is why cannot directly read into array */ 2165 vals_ptr = vals; 2166 for (i=0; i<m; i++) { 2167 for (j=0; j<N; j++) { 2168 array[i + j*m] = *vals_ptr++; 2169 } 2170 } 2171 2172 /* read in other processors and ship out */ 2173 for (i=1; i<size; i++) { 2174 nz = (rowners[i+1] - rowners[i])*N; 2175 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2176 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 2177 } 2178 } else { 2179 /* receive numeric values */ 2180 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2181 2182 /* receive message of values*/ 2183 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 2184 2185 /* insert into matrix-by row (this is why cannot directly read into array */ 2186 vals_ptr = vals; 2187 for (i=0; i<m; i++) { 2188 for (j=0; j<N; j++) { 2189 array[i + j*m] = *vals_ptr++; 2190 } 2191 } 2192 } 2193 ierr = PetscFree(rowners);CHKERRQ(ierr); 2194 ierr = PetscFree(vals);CHKERRQ(ierr); 2195 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2196 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatLoadnew_MPIDense" 2202 PetscErrorCode MatLoadnew_MPIDense(PetscViewer viewer,Mat newmat) 2203 { 2204 PetscScalar *vals,*svals; 2205 MPI_Comm comm = ((PetscObject)viewer)->comm; 2206 MPI_Status status; 2207 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 2208 PetscInt header[4],*rowlengths = 0,M,N,*cols; 2209 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 2210 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 2211 int fd; 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2216 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2217 if (!rank) { 2218 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2219 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2220 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2221 } 2222 2223 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2224 2225 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2226 M = header[1]; N = header[2]; nz = header[3]; 2227 2228 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2229 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2230 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2231 2232 /* If global sizes are set, check if they are consistent with that given in the file */ 2233 if (sizesset) { 2234 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2235 } 2236 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2237 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2238 2239 /* 2240 Handle case where matrix is stored on disk as a dense matrix 2241 */ 2242 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 2243 ierr = MatLoadnew_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 2244 PetscFunctionReturn(0); 2245 } 2246 2247 /* determine ownership of all rows */ 2248 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 2249 else m = PetscMPIIntCast(newmat->rmap->n); 2250 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 2251 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2252 rowners[0] = 0; 2253 for (i=2; i<=size; i++) { 2254 rowners[i] += rowners[i-1]; 2255 } 2256 rstart = rowners[rank]; 2257 rend = rowners[rank+1]; 2258 2259 /* distribute row lengths to all processors */ 2260 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2261 if (!rank) { 2262 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2263 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2264 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2265 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2266 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2267 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2268 } else { 2269 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2270 } 2271 2272 if (!rank) { 2273 /* calculate the number of nonzeros on each processor */ 2274 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2275 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2276 for (i=0; i<size; i++) { 2277 for (j=rowners[i]; j< rowners[i+1]; j++) { 2278 procsnz[i] += rowlengths[j]; 2279 } 2280 } 2281 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2282 2283 /* determine max buffer needed and allocate it */ 2284 maxnz = 0; 2285 for (i=0; i<size; i++) { 2286 maxnz = PetscMax(maxnz,procsnz[i]); 2287 } 2288 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2289 2290 /* read in my part of the matrix column indices */ 2291 nz = procsnz[0]; 2292 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2293 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2294 2295 /* read in every one elses and ship off */ 2296 for (i=1; i<size; i++) { 2297 nz = procsnz[i]; 2298 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2299 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2300 } 2301 ierr = PetscFree(cols);CHKERRQ(ierr); 2302 } else { 2303 /* determine buffer space needed for message */ 2304 nz = 0; 2305 for (i=0; i<m; i++) { 2306 nz += ourlens[i]; 2307 } 2308 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2309 2310 /* receive message of column indices*/ 2311 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2312 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2313 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2314 } 2315 2316 /* loop over local rows, determining number of off diagonal entries */ 2317 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2318 jj = 0; 2319 for (i=0; i<m; i++) { 2320 for (j=0; j<ourlens[i]; j++) { 2321 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2322 jj++; 2323 } 2324 } 2325 2326 /* create our matrix */ 2327 for (i=0; i<m; i++) { 2328 ourlens[i] -= offlens[i]; 2329 } 2330 2331 if (!sizesset) { 2332 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2333 } 2334 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2335 for (i=0; i<m; i++) { 2336 ourlens[i] += offlens[i]; 2337 } 2338 2339 if (!rank) { 2340 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2341 2342 /* read in my part of the matrix numerical values */ 2343 nz = procsnz[0]; 2344 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2345 2346 /* insert into matrix */ 2347 jj = rstart; 2348 smycols = mycols; 2349 svals = vals; 2350 for (i=0; i<m; i++) { 2351 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2352 smycols += ourlens[i]; 2353 svals += ourlens[i]; 2354 jj++; 2355 } 2356 2357 /* read in other processors and ship out */ 2358 for (i=1; i<size; i++) { 2359 nz = procsnz[i]; 2360 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2361 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2362 } 2363 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2364 } else { 2365 /* receive numeric values */ 2366 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2367 2368 /* receive message of values*/ 2369 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2370 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2371 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2372 2373 /* insert into matrix */ 2374 jj = rstart; 2375 smycols = mycols; 2376 svals = vals; 2377 for (i=0; i<m; i++) { 2378 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2379 smycols += ourlens[i]; 2380 svals += ourlens[i]; 2381 jj++; 2382 } 2383 } 2384 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2385 ierr = PetscFree(vals);CHKERRQ(ierr); 2386 ierr = PetscFree(mycols);CHKERRQ(ierr); 2387 ierr = PetscFree(rowners);CHKERRQ(ierr); 2388 2389 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2390 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatEqual_MPIDense" 2396 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2397 { 2398 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2399 Mat a,b; 2400 PetscTruth flg; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 a = matA->A; 2405 b = matB->A; 2406 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2407 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 #if defined(PETSC_HAVE_PLAPACK) 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2415 /*@C 2416 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2417 Level: developer 2418 2419 .keywords: Petsc, destroy, package, PLAPACK 2420 .seealso: PetscFinalize() 2421 @*/ 2422 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2423 { 2424 PetscErrorCode ierr; 2425 2426 PetscFunctionBegin; 2427 ierr = PLA_Finalize();CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2433 /*@C 2434 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2435 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2436 2437 Input Parameter: 2438 . comm - the communicator the matrix lives on 2439 2440 Level: developer 2441 2442 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2443 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2444 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2445 cannot overlap. 2446 2447 .keywords: Petsc, initialize, package, PLAPACK 2448 .seealso: PetscSysInitializePackage(), PetscInitialize() 2449 @*/ 2450 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2451 { 2452 PetscMPIInt size; 2453 PetscErrorCode ierr; 2454 2455 PetscFunctionBegin; 2456 if (!PLA_Initialized(PETSC_NULL)) { 2457 2458 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2459 Plapack_nprows = 1; 2460 Plapack_npcols = size; 2461 2462 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2463 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2464 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2465 #if defined(PETSC_USE_DEBUG) 2466 Plapack_ierror = 3; 2467 #else 2468 Plapack_ierror = 0; 2469 #endif 2470 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2471 if (Plapack_ierror){ 2472 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2473 } else { 2474 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2475 } 2476 2477 Plapack_nb_alg = 0; 2478 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2479 if (Plapack_nb_alg) { 2480 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2481 } 2482 PetscOptionsEnd(); 2483 2484 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2485 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2486 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2487 } 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #endif 2492