1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) { 174 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 175 } 176 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 177 } 178 } else { 179 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatGetArray_MPIDense" 187 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 199 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 200 { 201 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 202 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 203 PetscErrorCode ierr; 204 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 205 const PetscInt *irow,*icol; 206 PetscScalar *av,*bv,*v = lmat->v; 207 Mat newmat; 208 IS iscol_local; 209 210 PetscFunctionBegin; 211 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 212 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 213 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 214 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 215 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 216 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 217 218 /* No parallel redistribution currently supported! Should really check each index set 219 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 220 original matrix! */ 221 222 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 223 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 224 225 /* Check submatrix call */ 226 if (scall == MAT_REUSE_MATRIX) { 227 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 228 /* Really need to test rows and column sizes! */ 229 newmat = *B; 230 } else { 231 /* Create and fill new matrix */ 232 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 233 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 234 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 235 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 236 } 237 238 /* Now extract the data pointers and do the copy, column at a time */ 239 newmatd = (Mat_MPIDense*)newmat->data; 240 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 241 242 for (i=0; i<Ncols; i++) { 243 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 244 for (j=0; j<nrows; j++) { 245 *bv++ = av[irow[j] - rstart]; 246 } 247 } 248 249 /* Assemble the matrices so that the correct flags are set */ 250 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 252 253 /* Free work space */ 254 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 255 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 256 *B = newmat; 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreArray_MPIDense" 262 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 263 { 264 PetscFunctionBegin; 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 270 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 273 MPI_Comm comm = ((PetscObject)mat)->comm; 274 PetscErrorCode ierr; 275 PetscInt nstash,reallocs; 276 InsertMode addv; 277 278 PetscFunctionBegin; 279 /* make sure all processors are either in INSERTMODE or ADDMODE */ 280 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 281 if (addv == (ADD_VALUES|INSERT_VALUES)) { 282 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 283 } 284 mat->insertmode = addv; /* in case this processor had no cache */ 285 286 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 287 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 288 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 294 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 295 { 296 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 297 PetscErrorCode ierr; 298 PetscInt i,*row,*col,flg,j,rstart,ncols; 299 PetscMPIInt n; 300 PetscScalar *val; 301 InsertMode addv=mat->insertmode; 302 303 PetscFunctionBegin; 304 /* wait on receives */ 305 while (1) { 306 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 307 if (!flg) break; 308 309 for (i=0; i<n;) { 310 /* Now identify the consecutive vals belonging to the same row */ 311 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 312 if (j < n) ncols = j-i; 313 else ncols = n-i; 314 /* Now assemble all these values with a single function call */ 315 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 316 i = j; 317 } 318 } 319 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 320 321 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 322 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 323 324 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 325 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 326 } 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatZeroEntries_MPIDense" 332 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 333 { 334 PetscErrorCode ierr; 335 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 336 337 PetscFunctionBegin; 338 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 342 /* the code does not do the diagonal entries correctly unless the 343 matrix is square and the column and row owerships are identical. 344 This is a BUG. The only way to fix it seems to be to access 345 mdn->A and mdn->B directly and not through the MatZeroRows() 346 routine. 347 */ 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatZeroRows_MPIDense" 350 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 351 { 352 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 PetscInt i,*owners = A->rmap->range; 355 PetscInt *nprocs,j,idx,nsends; 356 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 357 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 358 PetscInt *lens,*lrows,*values; 359 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 360 MPI_Comm comm = ((PetscObject)A)->comm; 361 MPI_Request *send_waits,*recv_waits; 362 MPI_Status recv_status,*send_status; 363 PetscTruth found; 364 365 PetscFunctionBegin; 366 /* first count number of contributors to each processor */ 367 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 368 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 369 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 370 for (i=0; i<N; i++) { 371 idx = rows[i]; 372 found = PETSC_FALSE; 373 for (j=0; j<size; j++) { 374 if (idx >= owners[j] && idx < owners[j+1]) { 375 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 376 } 377 } 378 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 379 } 380 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 381 382 /* inform other processors of number of messages and max length*/ 383 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 384 385 /* post receives: */ 386 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 387 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 388 for (i=0; i<nrecvs; i++) { 389 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 390 } 391 392 /* do sends: 393 1) starts[i] gives the starting index in svalues for stuff going to 394 the ith processor 395 */ 396 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 397 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 398 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 399 starts[0] = 0; 400 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 for (i=0; i<N; i++) { 402 svalues[starts[owner[i]]++] = rows[i]; 403 } 404 405 starts[0] = 0; 406 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 407 count = 0; 408 for (i=0; i<size; i++) { 409 if (nprocs[2*i+1]) { 410 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 411 } 412 } 413 ierr = PetscFree(starts);CHKERRQ(ierr); 414 415 base = owners[rank]; 416 417 /* wait on receives */ 418 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 419 source = lens + nrecvs; 420 count = nrecvs; slen = 0; 421 while (count) { 422 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 423 /* unpack receives into our local space */ 424 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 425 source[imdex] = recv_status.MPI_SOURCE; 426 lens[imdex] = n; 427 slen += n; 428 count--; 429 } 430 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 431 432 /* move the data into the send scatter */ 433 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 434 count = 0; 435 for (i=0; i<nrecvs; i++) { 436 values = rvalues + i*nmax; 437 for (j=0; j<lens[i]; j++) { 438 lrows[count++] = values[j] - base; 439 } 440 } 441 ierr = PetscFree(rvalues);CHKERRQ(ierr); 442 ierr = PetscFree(lens);CHKERRQ(ierr); 443 ierr = PetscFree(owner);CHKERRQ(ierr); 444 ierr = PetscFree(nprocs);CHKERRQ(ierr); 445 446 /* actually zap the local rows */ 447 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 448 ierr = PetscFree(lrows);CHKERRQ(ierr); 449 450 /* wait on sends */ 451 if (nsends) { 452 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 453 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 454 ierr = PetscFree(send_status);CHKERRQ(ierr); 455 } 456 ierr = PetscFree(send_waits);CHKERRQ(ierr); 457 ierr = PetscFree(svalues);CHKERRQ(ierr); 458 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatMult_MPIDense" 464 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 465 { 466 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467 PetscErrorCode ierr; 468 469 PetscFunctionBegin; 470 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatMultAdd_MPIDense" 478 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 485 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 487 PetscFunctionReturn(0); 488 } 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatMultTranspose_MPIDense" 492 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 493 { 494 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 495 PetscErrorCode ierr; 496 PetscScalar zero = 0.0; 497 498 PetscFunctionBegin; 499 ierr = VecSet(yy,zero);CHKERRQ(ierr); 500 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 501 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 502 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 508 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 509 { 510 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 515 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 516 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatGetDiagonal_MPIDense" 523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 524 { 525 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 526 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 527 PetscErrorCode ierr; 528 PetscInt len,i,n,m = A->rmap->n,radd; 529 PetscScalar *x,zero = 0.0; 530 531 PetscFunctionBegin; 532 ierr = VecSet(v,zero);CHKERRQ(ierr); 533 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 534 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 535 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 536 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 537 radd = A->rmap->rstart*m; 538 for (i=0; i<len; i++) { 539 x[i] = aloc->v[radd + i*m + i]; 540 } 541 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatDestroy_MPIDense" 547 PetscErrorCode MatDestroy_MPIDense(Mat mat) 548 { 549 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 550 PetscErrorCode ierr; 551 #if defined(PETSC_HAVE_PLAPACK) 552 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 553 #endif 554 555 PetscFunctionBegin; 556 557 #if defined(PETSC_USE_LOG) 558 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 559 #endif 560 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 561 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 562 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 563 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 564 #if defined(PETSC_HAVE_PLAPACK) 565 if (lu) { 566 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 567 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 568 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 569 570 if (lu->is_pla) { 571 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 572 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 574 } 575 } 576 #endif 577 578 ierr = PetscFree(mdn);CHKERRQ(ierr); 579 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense_Binary" 590 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 591 { 592 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 593 PetscErrorCode ierr; 594 PetscViewerFormat format; 595 int fd; 596 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 597 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 598 PetscScalar *work,*v,*vv; 599 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 600 MPI_Status status; 601 602 PetscFunctionBegin; 603 if (mdn->size == 1) { 604 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 605 } else { 606 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 607 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 608 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 609 610 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 611 if (format == PETSC_VIEWER_NATIVE) { 612 613 if (!rank) { 614 /* store the matrix as a dense matrix */ 615 header[0] = MAT_FILE_COOKIE; 616 header[1] = mat->rmap->N; 617 header[2] = N; 618 header[3] = MATRIX_BINARY_FORMAT_DENSE; 619 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 620 621 /* get largest work array needed for transposing array */ 622 mmax = mat->rmap->n; 623 for (i=1; i<size; i++) { 624 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 625 } 626 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 627 628 /* write out local array, by rows */ 629 m = mat->rmap->n; 630 v = a->v; 631 for (j=0; j<N; j++) { 632 for (i=0; i<m; i++) { 633 work[j + i*N] = *v++; 634 } 635 } 636 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 637 /* get largest work array to receive messages from other processes, excludes process zero */ 638 mmax = 0; 639 for (i=1; i<size; i++) { 640 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 641 } 642 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 643 for(k = 1; k < size; k++) { 644 v = vv; 645 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 646 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 647 648 for(j = 0; j < N; j++) { 649 for(i = 0; i < m; i++) { 650 work[j + i*N] = *v++; 651 } 652 } 653 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 654 } 655 ierr = PetscFree(work);CHKERRQ(ierr); 656 ierr = PetscFree(vv);CHKERRQ(ierr); 657 } else { 658 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 } 660 } else { 661 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 669 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 670 { 671 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 672 PetscErrorCode ierr; 673 PetscMPIInt size = mdn->size,rank = mdn->rank; 674 const PetscViewerType vtype; 675 PetscTruth iascii,isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 #if defined(PETSC_HAVE_PLAPACK) 679 Mat_Plapack *lu; 680 #endif 681 682 PetscFunctionBegin; 683 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 684 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 685 if (iascii) { 686 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 687 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 688 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 689 MatInfo info; 690 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 692 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 693 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 694 #if defined(PETSC_HAVE_PLAPACK) 695 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 696 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 697 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 698 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 699 if (mat->factor){ 700 lu=(Mat_Plapack*)(mat->spptr); 701 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 702 } 703 #else 704 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 705 #endif 706 PetscFunctionReturn(0); 707 } else if (format == PETSC_VIEWER_ASCII_INFO) { 708 PetscFunctionReturn(0); 709 } 710 } else if (isdraw) { 711 PetscDraw draw; 712 PetscTruth isnull; 713 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 716 if (isnull) PetscFunctionReturn(0); 717 } 718 719 if (size == 1) { 720 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 725 PetscInt *cols; 726 PetscScalar *vals; 727 728 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 729 if (!rank) { 730 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 731 } else { 732 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 733 } 734 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 735 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 736 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 737 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 738 739 /* Copy the matrix ... This isn't the most efficient means, 740 but it's quick for now */ 741 A->insertmode = INSERT_VALUES; 742 row = mat->rmap->rstart; m = mdn->A->rmap->n; 743 for (i=0; i<m; i++) { 744 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 745 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 746 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 747 row++; 748 } 749 750 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 752 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 753 if (!rank) { 754 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 755 } 756 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 758 ierr = MatDestroy(A);CHKERRQ(ierr); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatView_MPIDense" 765 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 766 { 767 PetscErrorCode ierr; 768 PetscTruth iascii,isbinary,isdraw,issocket; 769 770 PetscFunctionBegin; 771 772 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 773 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 774 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 775 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 776 777 if (iascii || issocket || isdraw) { 778 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 779 } else if (isbinary) { 780 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 781 } else { 782 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatGetInfo_MPIDense" 789 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 790 { 791 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 792 Mat mdn = mat->A; 793 PetscErrorCode ierr; 794 PetscReal isend[5],irecv[5]; 795 796 PetscFunctionBegin; 797 info->block_size = 1.0; 798 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 799 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 800 isend[3] = info->memory; isend[4] = info->mallocs; 801 if (flag == MAT_LOCAL) { 802 info->nz_used = isend[0]; 803 info->nz_allocated = isend[1]; 804 info->nz_unneeded = isend[2]; 805 info->memory = isend[3]; 806 info->mallocs = isend[4]; 807 } else if (flag == MAT_GLOBAL_MAX) { 808 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 809 info->nz_used = irecv[0]; 810 info->nz_allocated = irecv[1]; 811 info->nz_unneeded = irecv[2]; 812 info->memory = irecv[3]; 813 info->mallocs = irecv[4]; 814 } else if (flag == MAT_GLOBAL_SUM) { 815 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 816 info->nz_used = irecv[0]; 817 info->nz_allocated = irecv[1]; 818 info->nz_unneeded = irecv[2]; 819 info->memory = irecv[3]; 820 info->mallocs = irecv[4]; 821 } 822 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 823 info->fill_ratio_needed = 0; 824 info->factor_mallocs = 0; 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatSetOption_MPIDense" 830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_NEW_NONZERO_LOCATIONS: 838 case MAT_NEW_NONZERO_LOCATION_ERR: 839 case MAT_NEW_NONZERO_ALLOCATION_ERR: 840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 841 break; 842 case MAT_ROW_ORIENTED: 843 a->roworiented = flg; 844 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 845 break; 846 case MAT_NEW_DIAGONALS: 847 case MAT_USE_HASH_TABLE: 848 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 849 break; 850 case MAT_IGNORE_OFF_PROC_ENTRIES: 851 a->donotstash = flg; 852 break; 853 case MAT_SYMMETRIC: 854 case MAT_STRUCTURALLY_SYMMETRIC: 855 case MAT_HERMITIAN: 856 case MAT_SYMMETRY_ETERNAL: 857 case MAT_IGNORE_LOWER_TRIANGULAR: 858 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 859 break; 860 default: 861 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 862 } 863 PetscFunctionReturn(0); 864 } 865 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatDiagonalScale_MPIDense" 869 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 870 { 871 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 872 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 873 PetscScalar *l,*r,x,*v; 874 PetscErrorCode ierr; 875 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 876 877 PetscFunctionBegin; 878 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 879 if (ll) { 880 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 881 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 882 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 883 for (i=0; i<m; i++) { 884 x = l[i]; 885 v = mat->v + i; 886 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 887 } 888 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 889 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 890 } 891 if (rr) { 892 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 893 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 894 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 897 for (i=0; i<n; i++) { 898 x = r[i]; 899 v = mat->v + i*m; 900 for (j=0; j<m; j++) { (*v++) *= x;} 901 } 902 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 903 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 904 } 905 PetscFunctionReturn(0); 906 } 907 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatNorm_MPIDense" 910 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 911 { 912 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 913 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 914 PetscErrorCode ierr; 915 PetscInt i,j; 916 PetscReal sum = 0.0; 917 PetscScalar *v = mat->v; 918 919 PetscFunctionBegin; 920 if (mdn->size == 1) { 921 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 922 } else { 923 if (type == NORM_FROBENIUS) { 924 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 925 #if defined(PETSC_USE_COMPLEX) 926 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 927 #else 928 sum += (*v)*(*v); v++; 929 #endif 930 } 931 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 932 *nrm = sqrt(*nrm); 933 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 934 } else if (type == NORM_1) { 935 PetscReal *tmp,*tmp2; 936 ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 937 tmp2 = tmp + A->cmap->N; 938 ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 939 *nrm = 0.0; 940 v = mat->v; 941 for (j=0; j<mdn->A->cmap->n; j++) { 942 for (i=0; i<mdn->A->rmap->n; i++) { 943 tmp[j] += PetscAbsScalar(*v); v++; 944 } 945 } 946 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 947 for (j=0; j<A->cmap->N; j++) { 948 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 949 } 950 ierr = PetscFree(tmp);CHKERRQ(ierr); 951 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 952 } else if (type == NORM_INFINITY) { /* max row norm */ 953 PetscReal ntemp; 954 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 955 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 956 } else { 957 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 958 } 959 } 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatTranspose_MPIDense" 965 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 966 { 967 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 968 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 969 Mat B; 970 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 971 PetscErrorCode ierr; 972 PetscInt j,i; 973 PetscScalar *v; 974 975 PetscFunctionBegin; 976 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 977 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 978 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 979 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 980 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 981 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 982 } else { 983 B = *matout; 984 } 985 986 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 987 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 988 for (i=0; i<m; i++) rwork[i] = rstart + i; 989 for (j=0; j<n; j++) { 990 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 991 v += m; 992 } 993 ierr = PetscFree(rwork);CHKERRQ(ierr); 994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 996 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 997 *matout = B; 998 } else { 999 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 1005 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1006 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1011 { 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #if defined(PETSC_HAVE_PLAPACK) 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1023 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1024 { 1025 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1026 PetscErrorCode ierr; 1027 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1028 PetscScalar *array; 1029 PetscReal one = 1.0; 1030 1031 PetscFunctionBegin; 1032 /* Copy A into F->lu->A */ 1033 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1034 ierr = PLA_API_begin();CHKERRQ(ierr); 1035 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1037 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1038 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1039 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1040 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1041 ierr = PLA_API_end();CHKERRQ(ierr); 1042 lu->rstart = rstart; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1048 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1049 { 1050 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1051 PetscErrorCode ierr; 1052 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1053 PetscScalar *array; 1054 PetscReal one = 1.0; 1055 1056 PetscFunctionBegin; 1057 /* Copy F into A->lu->A */ 1058 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1059 ierr = PLA_API_begin();CHKERRQ(ierr); 1060 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1062 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1063 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1064 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1065 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1066 ierr = PLA_API_end();CHKERRQ(ierr); 1067 lu->rstart = rstart; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1073 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1074 { 1075 PetscErrorCode ierr; 1076 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1077 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1078 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1079 PLA_Obj alpha = NULL,beta = NULL; 1080 1081 PetscFunctionBegin; 1082 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1083 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1084 1085 /* 1086 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1087 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1088 */ 1089 1090 /* do the multiply in PLA */ 1091 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1092 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1093 CHKMEMQ; 1094 1095 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1096 CHKMEMQ; 1097 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1098 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1099 1100 /* 1101 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1102 */ 1103 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1109 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1110 { 1111 PetscErrorCode ierr; 1112 PetscInt m=A->rmap->n,n=B->cmap->n; 1113 Mat Cmat; 1114 1115 PetscFunctionBegin; 1116 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1117 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1118 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1119 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1120 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1121 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1122 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1123 1124 *C = Cmat; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1130 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1131 { 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 if (scall == MAT_INITIAL_MATRIX){ 1136 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1137 } 1138 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSolve_MPIDense" 1144 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1145 { 1146 MPI_Comm comm = ((PetscObject)A)->comm; 1147 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1148 PetscErrorCode ierr; 1149 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1150 PetscScalar *array; 1151 PetscReal one = 1.0; 1152 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1153 PLA_Obj v_pla = NULL; 1154 PetscScalar *loc_buf; 1155 Vec loc_x; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1160 1161 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1162 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1163 PLA_Obj_set_to_zero(v_pla); 1164 1165 /* Copy b into rhs_pla */ 1166 PLA_API_begin(); 1167 PLA_Obj_API_open(v_pla); 1168 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1169 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1170 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1171 PLA_Obj_API_close(v_pla); 1172 PLA_API_end(); 1173 1174 if (A->factor == MAT_FACTOR_LU){ 1175 /* Apply the permutations to the right hand sides */ 1176 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1177 1178 /* Solve L y = b, overwriting b with y */ 1179 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1180 1181 /* Solve U x = y (=b), overwriting b with x */ 1182 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1183 } else { /* MAT_FACTOR_CHOLESKY */ 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1185 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1186 PLA_NONUNIT_DIAG,lu->A,v_pla); 1187 } 1188 1189 /* Copy PLAPACK x into Petsc vector x */ 1190 PLA_Obj_local_length(v_pla, &loc_m); 1191 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1192 PLA_Obj_local_stride(v_pla, &loc_stride); 1193 /* 1194 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1195 */ 1196 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1197 if (!lu->pla_solved){ 1198 1199 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1200 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1201 1202 /* Create IS and cts for VecScatterring */ 1203 PLA_Obj_local_length(v_pla, &loc_m); 1204 PLA_Obj_local_stride(v_pla, &loc_stride); 1205 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1206 idx_petsc = idx_pla + loc_m; 1207 1208 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1209 for (i=0; i<loc_m; i+=lu->nb){ 1210 j = 0; 1211 while (j < lu->nb && i+j < loc_m){ 1212 idx_petsc[i+j] = rstart + j; j++; 1213 } 1214 rstart += size*lu->nb; 1215 } 1216 1217 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1218 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1220 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1221 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1222 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1223 } 1224 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1226 1227 /* Free data */ 1228 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1229 PLA_Obj_free(&v_pla); 1230 1231 lu->pla_solved = PETSC_TRUE; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1237 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1238 { 1239 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1240 PetscErrorCode ierr; 1241 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1242 PetscInt info_pla=0; 1243 PetscScalar *array,one = 1.0; 1244 1245 PetscFunctionBegin; 1246 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1247 PLA_Obj_free(&lu->A); 1248 PLA_Obj_free (&lu->pivots); 1249 } 1250 /* Create PLAPACK matrix object */ 1251 lu->A = NULL; lu->pivots = NULL; 1252 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1253 PLA_Obj_set_to_zero(lu->A); 1254 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1255 1256 /* Copy A into lu->A */ 1257 PLA_API_begin(); 1258 PLA_Obj_API_open(lu->A); 1259 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1260 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1261 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1262 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1263 PLA_Obj_API_close(lu->A); 1264 PLA_API_end(); 1265 1266 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1267 info_pla = PLA_LU(lu->A,lu->pivots); 1268 if (info_pla != 0) 1269 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1270 1271 lu->rstart = rstart; 1272 lu->mstruct = SAME_NONZERO_PATTERN; 1273 F->ops->solve = MatSolve_MPIDense; 1274 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1275 PetscFunctionReturn(0); 1276 } 1277 1278 #undef __FUNCT__ 1279 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1280 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1281 { 1282 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1283 PetscErrorCode ierr; 1284 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1285 PetscInt info_pla=0; 1286 PetscScalar *array,one = 1.0; 1287 1288 PetscFunctionBegin; 1289 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1290 PLA_Obj_free(&lu->A); 1291 } 1292 /* Create PLAPACK matrix object */ 1293 lu->A = NULL; 1294 lu->pivots = NULL; 1295 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1296 1297 /* Copy A into lu->A */ 1298 PLA_API_begin(); 1299 PLA_Obj_API_open(lu->A); 1300 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1301 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1302 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1303 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1304 PLA_Obj_API_close(lu->A); 1305 PLA_API_end(); 1306 1307 /* Factor P A -> Chol */ 1308 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1309 if (info_pla != 0) 1310 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1311 1312 lu->rstart = rstart; 1313 lu->mstruct = SAME_NONZERO_PATTERN; 1314 F->ops->solve = MatSolve_MPIDense; 1315 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /* Note the Petsc perm permutation is ignored */ 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1322 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1323 { 1324 PetscErrorCode ierr; 1325 PetscTruth issymmetric,set; 1326 1327 PetscFunctionBegin; 1328 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1329 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1330 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /* Note the Petsc r and c permutations are ignored */ 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1337 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1338 { 1339 PetscErrorCode ierr; 1340 PetscInt M = A->rmap->N; 1341 Mat_Plapack *lu; 1342 1343 PetscFunctionBegin; 1344 lu = (Mat_Plapack*)F->spptr; 1345 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1346 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 EXTERN_C_BEGIN 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1353 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1354 { 1355 PetscFunctionBegin; 1356 *type = MAT_SOLVER_PLAPACK; 1357 PetscFunctionReturn(0); 1358 } 1359 EXTERN_C_END 1360 1361 EXTERN_C_BEGIN 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1364 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1365 { 1366 PetscErrorCode ierr; 1367 Mat_Plapack *lu; 1368 PetscMPIInt size; 1369 PetscInt M=A->rmap->N; 1370 1371 PetscFunctionBegin; 1372 /* Create the factorization matrix */ 1373 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1374 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1375 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1376 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1377 (*F)->spptr = (void*)lu; 1378 1379 /* Set default Plapack parameters */ 1380 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1381 lu->nb = M/size; 1382 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1383 1384 /* Set runtime options */ 1385 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1386 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1387 PetscOptionsEnd(); 1388 1389 /* Create object distribution template */ 1390 lu->templ = NULL; 1391 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1392 1393 /* Set the datatype */ 1394 #if defined(PETSC_USE_COMPLEX) 1395 lu->datatype = MPI_DOUBLE_COMPLEX; 1396 #else 1397 lu->datatype = MPI_DOUBLE; 1398 #endif 1399 1400 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1401 1402 1403 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1404 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1405 1406 if (ftype == MAT_FACTOR_LU) { 1407 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1408 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1409 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1410 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1411 (*F)->factor = ftype; 1412 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 EXTERN_C_END 1416 #endif 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1420 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1421 { 1422 #if defined(PETSC_HAVE_PLAPACK) 1423 PetscErrorCode ierr; 1424 #endif 1425 1426 PetscFunctionBegin; 1427 #if defined(PETSC_HAVE_PLAPACK) 1428 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1429 #else 1430 SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1431 #endif 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatAXPY_MPIDense" 1437 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1438 { 1439 PetscErrorCode ierr; 1440 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1441 1442 PetscFunctionBegin; 1443 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 /* -------------------------------------------------------------------*/ 1448 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1449 MatGetRow_MPIDense, 1450 MatRestoreRow_MPIDense, 1451 MatMult_MPIDense, 1452 /* 4*/ MatMultAdd_MPIDense, 1453 MatMultTranspose_MPIDense, 1454 MatMultTransposeAdd_MPIDense, 1455 0, 1456 0, 1457 0, 1458 /*10*/ 0, 1459 0, 1460 0, 1461 0, 1462 MatTranspose_MPIDense, 1463 /*15*/ MatGetInfo_MPIDense, 1464 MatEqual_MPIDense, 1465 MatGetDiagonal_MPIDense, 1466 MatDiagonalScale_MPIDense, 1467 MatNorm_MPIDense, 1468 /*20*/ MatAssemblyBegin_MPIDense, 1469 MatAssemblyEnd_MPIDense, 1470 MatSetOption_MPIDense, 1471 MatZeroEntries_MPIDense, 1472 /*24*/ MatZeroRows_MPIDense, 1473 0, 1474 0, 1475 0, 1476 0, 1477 /*29*/ MatSetUpPreallocation_MPIDense, 1478 0, 1479 0, 1480 MatGetArray_MPIDense, 1481 MatRestoreArray_MPIDense, 1482 /*34*/ MatDuplicate_MPIDense, 1483 0, 1484 0, 1485 0, 1486 0, 1487 /*39*/ MatAXPY_MPIDense, 1488 MatGetSubMatrices_MPIDense, 1489 0, 1490 MatGetValues_MPIDense, 1491 0, 1492 /*44*/ 0, 1493 MatScale_MPIDense, 1494 0, 1495 0, 1496 0, 1497 /*49*/ 0, 1498 0, 1499 0, 1500 0, 1501 0, 1502 /*54*/ 0, 1503 0, 1504 0, 1505 0, 1506 0, 1507 /*59*/ MatGetSubMatrix_MPIDense, 1508 MatDestroy_MPIDense, 1509 MatView_MPIDense, 1510 0, 1511 0, 1512 /*64*/ 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 /*69*/ 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 /*74*/ 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 /*79*/ 0, 1528 0, 1529 0, 1530 0, 1531 /*83*/ MatLoad_MPIDense, 1532 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*89*/ 1538 #if defined(PETSC_HAVE_PLAPACK) 1539 MatMatMult_MPIDense_MPIDense, 1540 MatMatMultSymbolic_MPIDense_MPIDense, 1541 MatMatMultNumeric_MPIDense_MPIDense, 1542 #else 1543 0, 1544 0, 1545 0, 1546 #endif 1547 0, 1548 /*94*/ 0, 1549 0, 1550 0, 1551 0}; 1552 1553 EXTERN_C_BEGIN 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1556 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1557 { 1558 Mat_MPIDense *a; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 mat->preallocated = PETSC_TRUE; 1563 /* Note: For now, when data is specified above, this assumes the user correctly 1564 allocates the local dense storage space. We should add error checking. */ 1565 1566 a = (Mat_MPIDense*)mat->data; 1567 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1568 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1569 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1570 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1571 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1572 PetscFunctionReturn(0); 1573 } 1574 EXTERN_C_END 1575 1576 /*MC 1577 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1578 1579 run config/configure.py with the option --download-plapack 1580 1581 1582 Options Database Keys: 1583 . -mat_plapack_nprows <n> - number of rows in processor partition 1584 . -mat_plapack_npcols <n> - number of columns in processor partition 1585 . -mat_plapack_nb <n> - block size of template vector 1586 . -mat_plapack_nb_alg <n> - algorithmic block size 1587 - -mat_plapack_ckerror <n> - error checking flag 1588 1589 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1590 1591 M*/ 1592 1593 EXTERN_C_BEGIN 1594 #undef __FUNCT__ 1595 #define __FUNCT__ "MatCreate_MPIDense" 1596 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1597 { 1598 Mat_MPIDense *a; 1599 PetscErrorCode ierr; 1600 1601 PetscFunctionBegin; 1602 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1603 mat->data = (void*)a; 1604 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1605 mat->mapping = 0; 1606 1607 mat->insertmode = NOT_SET_VALUES; 1608 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1609 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1610 1611 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1612 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1613 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1614 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1615 a->nvec = mat->cmap->n; 1616 1617 /* build cache for off array entries formed */ 1618 a->donotstash = PETSC_FALSE; 1619 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1620 1621 /* stuff used for matrix vector multiply */ 1622 a->lvec = 0; 1623 a->Mvctx = 0; 1624 a->roworiented = PETSC_TRUE; 1625 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1627 "MatGetDiagonalBlock_MPIDense", 1628 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1630 "MatMPIDenseSetPreallocation_MPIDense", 1631 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1633 "MatMatMult_MPIAIJ_MPIDense", 1634 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1636 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1637 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1639 "MatMatMultNumeric_MPIAIJ_MPIDense", 1640 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1642 "MatGetFactor_mpidense_petsc", 1643 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1644 #if defined(PETSC_HAVE_PLAPACK) 1645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1646 "MatGetFactor_mpidense_plapack", 1647 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1648 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1649 #endif 1650 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1651 1652 PetscFunctionReturn(0); 1653 } 1654 EXTERN_C_END 1655 1656 /*MC 1657 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1658 1659 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1660 and MATMPIDENSE otherwise. 1661 1662 Options Database Keys: 1663 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1664 1665 Level: beginner 1666 1667 1668 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1669 M*/ 1670 1671 EXTERN_C_BEGIN 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatCreate_Dense" 1674 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1675 { 1676 PetscErrorCode ierr; 1677 PetscMPIInt size; 1678 1679 PetscFunctionBegin; 1680 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1681 if (size == 1) { 1682 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1683 } else { 1684 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 EXTERN_C_END 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1692 /*@C 1693 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1694 1695 Not collective 1696 1697 Input Parameters: 1698 . A - the matrix 1699 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1700 to control all matrix memory allocation. 1701 1702 Notes: 1703 The dense format is fully compatible with standard Fortran 77 1704 storage by columns. 1705 1706 The data input variable is intended primarily for Fortran programmers 1707 who wish to allocate their own matrix memory space. Most users should 1708 set data=PETSC_NULL. 1709 1710 Level: intermediate 1711 1712 .keywords: matrix,dense, parallel 1713 1714 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1715 @*/ 1716 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1717 { 1718 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1719 1720 PetscFunctionBegin; 1721 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1722 if (f) { 1723 ierr = (*f)(mat,data);CHKERRQ(ierr); 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatCreateMPIDense" 1730 /*@C 1731 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1732 1733 Collective on MPI_Comm 1734 1735 Input Parameters: 1736 + comm - MPI communicator 1737 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1738 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1739 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1740 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1741 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1742 to control all matrix memory allocation. 1743 1744 Output Parameter: 1745 . A - the matrix 1746 1747 Notes: 1748 The dense format is fully compatible with standard Fortran 77 1749 storage by columns. 1750 1751 The data input variable is intended primarily for Fortran programmers 1752 who wish to allocate their own matrix memory space. Most users should 1753 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1754 1755 The user MUST specify either the local or global matrix dimensions 1756 (possibly both). 1757 1758 Level: intermediate 1759 1760 .keywords: matrix,dense, parallel 1761 1762 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1763 @*/ 1764 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1765 { 1766 PetscErrorCode ierr; 1767 PetscMPIInt size; 1768 1769 PetscFunctionBegin; 1770 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1771 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1772 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1773 if (size > 1) { 1774 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1775 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1776 } else { 1777 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1778 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1779 } 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatDuplicate_MPIDense" 1785 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1786 { 1787 Mat mat; 1788 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 *newmat = 0; 1793 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1794 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1795 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1796 a = (Mat_MPIDense*)mat->data; 1797 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1798 mat->factor = A->factor; 1799 mat->assembled = PETSC_TRUE; 1800 mat->preallocated = PETSC_TRUE; 1801 1802 mat->rmap->rstart = A->rmap->rstart; 1803 mat->rmap->rend = A->rmap->rend; 1804 a->size = oldmat->size; 1805 a->rank = oldmat->rank; 1806 mat->insertmode = NOT_SET_VALUES; 1807 a->nvec = oldmat->nvec; 1808 a->donotstash = oldmat->donotstash; 1809 1810 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1811 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1812 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1813 1814 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1815 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1816 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1817 1818 *newmat = mat; 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #include "petscsys.h" 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1826 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1827 { 1828 PetscErrorCode ierr; 1829 PetscMPIInt rank,size; 1830 PetscInt *rowners,i,m,nz,j; 1831 PetscScalar *array,*vals,*vals_ptr; 1832 MPI_Status status; 1833 1834 PetscFunctionBegin; 1835 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1836 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1837 1838 /* determine ownership of all rows */ 1839 m = M/size + ((M % size) > rank); 1840 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1841 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1842 rowners[0] = 0; 1843 for (i=2; i<=size; i++) { 1844 rowners[i] += rowners[i-1]; 1845 } 1846 1847 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1848 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1849 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1850 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1851 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1852 1853 if (!rank) { 1854 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1855 1856 /* read in my part of the matrix numerical values */ 1857 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1858 1859 /* insert into matrix-by row (this is why cannot directly read into array */ 1860 vals_ptr = vals; 1861 for (i=0; i<m; i++) { 1862 for (j=0; j<N; j++) { 1863 array[i + j*m] = *vals_ptr++; 1864 } 1865 } 1866 1867 /* read in other processors and ship out */ 1868 for (i=1; i<size; i++) { 1869 nz = (rowners[i+1] - rowners[i])*N; 1870 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1871 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1872 } 1873 } else { 1874 /* receive numeric values */ 1875 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1876 1877 /* receive message of values*/ 1878 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1879 1880 /* insert into matrix-by row (this is why cannot directly read into array */ 1881 vals_ptr = vals; 1882 for (i=0; i<m; i++) { 1883 for (j=0; j<N; j++) { 1884 array[i + j*m] = *vals_ptr++; 1885 } 1886 } 1887 } 1888 ierr = PetscFree(rowners);CHKERRQ(ierr); 1889 ierr = PetscFree(vals);CHKERRQ(ierr); 1890 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1891 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "MatLoad_MPIDense" 1897 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1898 { 1899 Mat A; 1900 PetscScalar *vals,*svals; 1901 MPI_Comm comm = ((PetscObject)viewer)->comm; 1902 MPI_Status status; 1903 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1904 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1905 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1906 PetscInt i,nz,j,rstart,rend; 1907 int fd; 1908 PetscErrorCode ierr; 1909 1910 PetscFunctionBegin; 1911 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1912 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1913 if (!rank) { 1914 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1915 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1916 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1917 } 1918 1919 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1920 M = header[1]; N = header[2]; nz = header[3]; 1921 1922 /* 1923 Handle case where matrix is stored on disk as a dense matrix 1924 */ 1925 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1926 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 /* determine ownership of all rows */ 1931 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1932 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1933 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1934 rowners[0] = 0; 1935 for (i=2; i<=size; i++) { 1936 rowners[i] += rowners[i-1]; 1937 } 1938 rstart = rowners[rank]; 1939 rend = rowners[rank+1]; 1940 1941 /* distribute row lengths to all processors */ 1942 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1943 offlens = ourlens + (rend-rstart); 1944 if (!rank) { 1945 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1946 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1947 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1948 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1949 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1950 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1951 } else { 1952 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1953 } 1954 1955 if (!rank) { 1956 /* calculate the number of nonzeros on each processor */ 1957 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1958 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1959 for (i=0; i<size; i++) { 1960 for (j=rowners[i]; j< rowners[i+1]; j++) { 1961 procsnz[i] += rowlengths[j]; 1962 } 1963 } 1964 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1965 1966 /* determine max buffer needed and allocate it */ 1967 maxnz = 0; 1968 for (i=0; i<size; i++) { 1969 maxnz = PetscMax(maxnz,procsnz[i]); 1970 } 1971 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1972 1973 /* read in my part of the matrix column indices */ 1974 nz = procsnz[0]; 1975 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1976 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1977 1978 /* read in every one elses and ship off */ 1979 for (i=1; i<size; i++) { 1980 nz = procsnz[i]; 1981 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1982 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1983 } 1984 ierr = PetscFree(cols);CHKERRQ(ierr); 1985 } else { 1986 /* determine buffer space needed for message */ 1987 nz = 0; 1988 for (i=0; i<m; i++) { 1989 nz += ourlens[i]; 1990 } 1991 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1992 1993 /* receive message of column indices*/ 1994 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1995 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1996 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1997 } 1998 1999 /* loop over local rows, determining number of off diagonal entries */ 2000 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2001 jj = 0; 2002 for (i=0; i<m; i++) { 2003 for (j=0; j<ourlens[i]; j++) { 2004 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2005 jj++; 2006 } 2007 } 2008 2009 /* create our matrix */ 2010 for (i=0; i<m; i++) { 2011 ourlens[i] -= offlens[i]; 2012 } 2013 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2014 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2015 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2016 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2017 A = *newmat; 2018 for (i=0; i<m; i++) { 2019 ourlens[i] += offlens[i]; 2020 } 2021 2022 if (!rank) { 2023 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2024 2025 /* read in my part of the matrix numerical values */ 2026 nz = procsnz[0]; 2027 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2028 2029 /* insert into matrix */ 2030 jj = rstart; 2031 smycols = mycols; 2032 svals = vals; 2033 for (i=0; i<m; i++) { 2034 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2035 smycols += ourlens[i]; 2036 svals += ourlens[i]; 2037 jj++; 2038 } 2039 2040 /* read in other processors and ship out */ 2041 for (i=1; i<size; i++) { 2042 nz = procsnz[i]; 2043 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2044 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2045 } 2046 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2047 } else { 2048 /* receive numeric values */ 2049 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2050 2051 /* receive message of values*/ 2052 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2053 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2054 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2055 2056 /* insert into matrix */ 2057 jj = rstart; 2058 smycols = mycols; 2059 svals = vals; 2060 for (i=0; i<m; i++) { 2061 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2062 smycols += ourlens[i]; 2063 svals += ourlens[i]; 2064 jj++; 2065 } 2066 } 2067 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2068 ierr = PetscFree(vals);CHKERRQ(ierr); 2069 ierr = PetscFree(mycols);CHKERRQ(ierr); 2070 ierr = PetscFree(rowners);CHKERRQ(ierr); 2071 2072 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatEqual_MPIDense" 2079 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2080 { 2081 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2082 Mat a,b; 2083 PetscTruth flg; 2084 PetscErrorCode ierr; 2085 2086 PetscFunctionBegin; 2087 a = matA->A; 2088 b = matB->A; 2089 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2090 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #if defined(PETSC_HAVE_PLAPACK) 2095 2096 #undef __FUNCT__ 2097 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2098 /*@C 2099 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2100 Level: developer 2101 2102 .keywords: Petsc, destroy, package, PLAPACK 2103 .seealso: PetscFinalize() 2104 @*/ 2105 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2106 { 2107 PetscErrorCode ierr; 2108 2109 PetscFunctionBegin; 2110 ierr = PLA_Finalize();CHKERRQ(ierr); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2116 /*@C 2117 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2118 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2119 2120 Input Parameter: 2121 . comm - the communicator the matrix lives on 2122 2123 Level: developer 2124 2125 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2126 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2127 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2128 cannot overlap. 2129 2130 .keywords: Petsc, initialize, package, PLAPACK 2131 .seealso: PetscInitializePackage(), PetscInitialize() 2132 @*/ 2133 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2134 { 2135 PetscMPIInt size; 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 if (!PLA_Initialized(PETSC_NULL)) { 2140 2141 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2142 Plapack_nprows = 1; 2143 Plapack_npcols = size; 2144 2145 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2146 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2147 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2148 #if defined(PETSC_USE_DEBUG) 2149 Plapack_ierror = 3; 2150 #else 2151 Plapack_ierror = 0; 2152 #endif 2153 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2154 if (Plapack_ierror){ 2155 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2156 } else { 2157 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2158 } 2159 2160 Plapack_nb_alg = 0; 2161 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2162 if (Plapack_nb_alg) { 2163 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2164 } 2165 PetscOptionsEnd(); 2166 2167 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2168 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2169 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2170 } 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #endif 2175