1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) { 174 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 175 } 176 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 177 } 178 } else { 179 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatGetArray_MPIDense" 187 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 199 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 200 { 201 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 202 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 203 PetscErrorCode ierr; 204 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 205 const PetscInt *irow,*icol; 206 PetscScalar *av,*bv,*v = lmat->v; 207 Mat newmat; 208 IS iscol_local; 209 210 PetscFunctionBegin; 211 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 212 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 213 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 214 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 215 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 216 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 217 218 /* No parallel redistribution currently supported! Should really check each index set 219 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 220 original matrix! */ 221 222 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 223 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 224 225 /* Check submatrix call */ 226 if (scall == MAT_REUSE_MATRIX) { 227 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 228 /* Really need to test rows and column sizes! */ 229 newmat = *B; 230 } else { 231 /* Create and fill new matrix */ 232 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 233 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 234 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 235 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 236 } 237 238 /* Now extract the data pointers and do the copy, column at a time */ 239 newmatd = (Mat_MPIDense*)newmat->data; 240 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 241 242 for (i=0; i<Ncols; i++) { 243 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 244 for (j=0; j<nrows; j++) { 245 *bv++ = av[irow[j] - rstart]; 246 } 247 } 248 249 /* Assemble the matrices so that the correct flags are set */ 250 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 252 253 /* Free work space */ 254 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 255 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 256 *B = newmat; 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreArray_MPIDense" 262 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 263 { 264 PetscFunctionBegin; 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 270 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 273 MPI_Comm comm = ((PetscObject)mat)->comm; 274 PetscErrorCode ierr; 275 PetscInt nstash,reallocs; 276 InsertMode addv; 277 278 PetscFunctionBegin; 279 /* make sure all processors are either in INSERTMODE or ADDMODE */ 280 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 281 if (addv == (ADD_VALUES|INSERT_VALUES)) { 282 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 283 } 284 mat->insertmode = addv; /* in case this processor had no cache */ 285 286 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 287 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 288 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 294 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 295 { 296 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 297 PetscErrorCode ierr; 298 PetscInt i,*row,*col,flg,j,rstart,ncols; 299 PetscMPIInt n; 300 PetscScalar *val; 301 InsertMode addv=mat->insertmode; 302 303 PetscFunctionBegin; 304 /* wait on receives */ 305 while (1) { 306 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 307 if (!flg) break; 308 309 for (i=0; i<n;) { 310 /* Now identify the consecutive vals belonging to the same row */ 311 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 312 if (j < n) ncols = j-i; 313 else ncols = n-i; 314 /* Now assemble all these values with a single function call */ 315 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 316 i = j; 317 } 318 } 319 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 320 321 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 322 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 323 324 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 325 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 326 } 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatZeroEntries_MPIDense" 332 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 333 { 334 PetscErrorCode ierr; 335 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 336 337 PetscFunctionBegin; 338 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 342 /* the code does not do the diagonal entries correctly unless the 343 matrix is square and the column and row owerships are identical. 344 This is a BUG. The only way to fix it seems to be to access 345 mdn->A and mdn->B directly and not through the MatZeroRows() 346 routine. 347 */ 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatZeroRows_MPIDense" 350 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 351 { 352 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 PetscInt i,*owners = A->rmap->range; 355 PetscInt *nprocs,j,idx,nsends; 356 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 357 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 358 PetscInt *lens,*lrows,*values; 359 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 360 MPI_Comm comm = ((PetscObject)A)->comm; 361 MPI_Request *send_waits,*recv_waits; 362 MPI_Status recv_status,*send_status; 363 PetscTruth found; 364 365 PetscFunctionBegin; 366 /* first count number of contributors to each processor */ 367 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 368 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 369 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 370 for (i=0; i<N; i++) { 371 idx = rows[i]; 372 found = PETSC_FALSE; 373 for (j=0; j<size; j++) { 374 if (idx >= owners[j] && idx < owners[j+1]) { 375 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 376 } 377 } 378 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 379 } 380 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 381 382 /* inform other processors of number of messages and max length*/ 383 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 384 385 /* post receives: */ 386 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 387 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 388 for (i=0; i<nrecvs; i++) { 389 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 390 } 391 392 /* do sends: 393 1) starts[i] gives the starting index in svalues for stuff going to 394 the ith processor 395 */ 396 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 397 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 398 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 399 starts[0] = 0; 400 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 for (i=0; i<N; i++) { 402 svalues[starts[owner[i]]++] = rows[i]; 403 } 404 405 starts[0] = 0; 406 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 407 count = 0; 408 for (i=0; i<size; i++) { 409 if (nprocs[2*i+1]) { 410 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 411 } 412 } 413 ierr = PetscFree(starts);CHKERRQ(ierr); 414 415 base = owners[rank]; 416 417 /* wait on receives */ 418 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 419 count = nrecvs; 420 slen = 0; 421 while (count) { 422 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 423 /* unpack receives into our local space */ 424 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 425 source[imdex] = recv_status.MPI_SOURCE; 426 lens[imdex] = n; 427 slen += n; 428 count--; 429 } 430 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 431 432 /* move the data into the send scatter */ 433 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 434 count = 0; 435 for (i=0; i<nrecvs; i++) { 436 values = rvalues + i*nmax; 437 for (j=0; j<lens[i]; j++) { 438 lrows[count++] = values[j] - base; 439 } 440 } 441 ierr = PetscFree(rvalues);CHKERRQ(ierr); 442 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 443 ierr = PetscFree(owner);CHKERRQ(ierr); 444 ierr = PetscFree(nprocs);CHKERRQ(ierr); 445 446 /* actually zap the local rows */ 447 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 448 ierr = PetscFree(lrows);CHKERRQ(ierr); 449 450 /* wait on sends */ 451 if (nsends) { 452 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 453 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 454 ierr = PetscFree(send_status);CHKERRQ(ierr); 455 } 456 ierr = PetscFree(send_waits);CHKERRQ(ierr); 457 ierr = PetscFree(svalues);CHKERRQ(ierr); 458 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatMult_MPIDense" 464 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 465 { 466 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467 PetscErrorCode ierr; 468 469 PetscFunctionBegin; 470 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatMultAdd_MPIDense" 478 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 485 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 487 PetscFunctionReturn(0); 488 } 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatMultTranspose_MPIDense" 492 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 493 { 494 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 495 PetscErrorCode ierr; 496 PetscScalar zero = 0.0; 497 498 PetscFunctionBegin; 499 ierr = VecSet(yy,zero);CHKERRQ(ierr); 500 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 501 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 502 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 508 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 509 { 510 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 515 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 516 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatGetDiagonal_MPIDense" 523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 524 { 525 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 526 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 527 PetscErrorCode ierr; 528 PetscInt len,i,n,m = A->rmap->n,radd; 529 PetscScalar *x,zero = 0.0; 530 531 PetscFunctionBegin; 532 ierr = VecSet(v,zero);CHKERRQ(ierr); 533 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 534 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 535 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 536 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 537 radd = A->rmap->rstart*m; 538 for (i=0; i<len; i++) { 539 x[i] = aloc->v[radd + i*m + i]; 540 } 541 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatDestroy_MPIDense" 547 PetscErrorCode MatDestroy_MPIDense(Mat mat) 548 { 549 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 550 PetscErrorCode ierr; 551 #if defined(PETSC_HAVE_PLAPACK) 552 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 553 #endif 554 555 PetscFunctionBegin; 556 557 #if defined(PETSC_USE_LOG) 558 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 559 #endif 560 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 561 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 562 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 563 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 564 #if defined(PETSC_HAVE_PLAPACK) 565 if (lu) { 566 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 567 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 568 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 569 570 if (lu->is_pla) { 571 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 572 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 574 } 575 } 576 #endif 577 578 ierr = PetscFree(mdn);CHKERRQ(ierr); 579 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense_Binary" 590 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 591 { 592 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 593 PetscErrorCode ierr; 594 PetscViewerFormat format; 595 int fd; 596 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 597 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 598 PetscScalar *work,*v,*vv; 599 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 600 MPI_Status status; 601 602 PetscFunctionBegin; 603 if (mdn->size == 1) { 604 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 605 } else { 606 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 607 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 608 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 609 610 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 611 if (format == PETSC_VIEWER_NATIVE) { 612 613 if (!rank) { 614 /* store the matrix as a dense matrix */ 615 header[0] = MAT_FILE_COOKIE; 616 header[1] = mat->rmap->N; 617 header[2] = N; 618 header[3] = MATRIX_BINARY_FORMAT_DENSE; 619 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 620 621 /* get largest work array needed for transposing array */ 622 mmax = mat->rmap->n; 623 for (i=1; i<size; i++) { 624 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 625 } 626 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 627 628 /* write out local array, by rows */ 629 m = mat->rmap->n; 630 v = a->v; 631 for (j=0; j<N; j++) { 632 for (i=0; i<m; i++) { 633 work[j + i*N] = *v++; 634 } 635 } 636 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 637 /* get largest work array to receive messages from other processes, excludes process zero */ 638 mmax = 0; 639 for (i=1; i<size; i++) { 640 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 641 } 642 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 643 for(k = 1; k < size; k++) { 644 v = vv; 645 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 646 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 647 648 for(j = 0; j < N; j++) { 649 for(i = 0; i < m; i++) { 650 work[j + i*N] = *v++; 651 } 652 } 653 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 654 } 655 ierr = PetscFree(work);CHKERRQ(ierr); 656 ierr = PetscFree(vv);CHKERRQ(ierr); 657 } else { 658 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 } 660 } else { 661 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 669 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 670 { 671 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 672 PetscErrorCode ierr; 673 PetscMPIInt size = mdn->size,rank = mdn->rank; 674 const PetscViewerType vtype; 675 PetscTruth iascii,isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 #if defined(PETSC_HAVE_PLAPACK) 679 Mat_Plapack *lu; 680 #endif 681 682 PetscFunctionBegin; 683 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 684 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 685 if (iascii) { 686 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 687 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 688 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 689 MatInfo info; 690 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 692 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 693 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 694 #if defined(PETSC_HAVE_PLAPACK) 695 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 696 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 697 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 698 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 699 if (mat->factor){ 700 lu=(Mat_Plapack*)(mat->spptr); 701 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 702 } 703 #else 704 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 705 #endif 706 PetscFunctionReturn(0); 707 } else if (format == PETSC_VIEWER_ASCII_INFO) { 708 PetscFunctionReturn(0); 709 } 710 } else if (isdraw) { 711 PetscDraw draw; 712 PetscTruth isnull; 713 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 716 if (isnull) PetscFunctionReturn(0); 717 } 718 719 if (size == 1) { 720 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 725 PetscInt *cols; 726 PetscScalar *vals; 727 728 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 729 if (!rank) { 730 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 731 } else { 732 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 733 } 734 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 735 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 736 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 737 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 738 739 /* Copy the matrix ... This isn't the most efficient means, 740 but it's quick for now */ 741 A->insertmode = INSERT_VALUES; 742 row = mat->rmap->rstart; m = mdn->A->rmap->n; 743 for (i=0; i<m; i++) { 744 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 745 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 746 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 747 row++; 748 } 749 750 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 752 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 753 if (!rank) { 754 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 755 } 756 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 758 ierr = MatDestroy(A);CHKERRQ(ierr); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatView_MPIDense" 765 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 766 { 767 PetscErrorCode ierr; 768 PetscTruth iascii,isbinary,isdraw,issocket; 769 770 PetscFunctionBegin; 771 772 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 773 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 774 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 775 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 776 777 if (iascii || issocket || isdraw) { 778 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 779 } else if (isbinary) { 780 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 781 } else { 782 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatGetInfo_MPIDense" 789 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 790 { 791 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 792 Mat mdn = mat->A; 793 PetscErrorCode ierr; 794 PetscReal isend[5],irecv[5]; 795 796 PetscFunctionBegin; 797 info->block_size = 1.0; 798 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 799 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 800 isend[3] = info->memory; isend[4] = info->mallocs; 801 if (flag == MAT_LOCAL) { 802 info->nz_used = isend[0]; 803 info->nz_allocated = isend[1]; 804 info->nz_unneeded = isend[2]; 805 info->memory = isend[3]; 806 info->mallocs = isend[4]; 807 } else if (flag == MAT_GLOBAL_MAX) { 808 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 809 info->nz_used = irecv[0]; 810 info->nz_allocated = irecv[1]; 811 info->nz_unneeded = irecv[2]; 812 info->memory = irecv[3]; 813 info->mallocs = irecv[4]; 814 } else if (flag == MAT_GLOBAL_SUM) { 815 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 816 info->nz_used = irecv[0]; 817 info->nz_allocated = irecv[1]; 818 info->nz_unneeded = irecv[2]; 819 info->memory = irecv[3]; 820 info->mallocs = irecv[4]; 821 } 822 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 823 info->fill_ratio_needed = 0; 824 info->factor_mallocs = 0; 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatSetOption_MPIDense" 830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_NEW_NONZERO_LOCATIONS: 838 case MAT_NEW_NONZERO_LOCATION_ERR: 839 case MAT_NEW_NONZERO_ALLOCATION_ERR: 840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 841 break; 842 case MAT_ROW_ORIENTED: 843 a->roworiented = flg; 844 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 845 break; 846 case MAT_NEW_DIAGONALS: 847 case MAT_USE_HASH_TABLE: 848 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 849 break; 850 case MAT_IGNORE_OFF_PROC_ENTRIES: 851 a->donotstash = flg; 852 break; 853 case MAT_SYMMETRIC: 854 case MAT_STRUCTURALLY_SYMMETRIC: 855 case MAT_HERMITIAN: 856 case MAT_SYMMETRY_ETERNAL: 857 case MAT_IGNORE_LOWER_TRIANGULAR: 858 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 859 break; 860 default: 861 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 862 } 863 PetscFunctionReturn(0); 864 } 865 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatDiagonalScale_MPIDense" 869 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 870 { 871 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 872 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 873 PetscScalar *l,*r,x,*v; 874 PetscErrorCode ierr; 875 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 876 877 PetscFunctionBegin; 878 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 879 if (ll) { 880 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 881 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 882 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 883 for (i=0; i<m; i++) { 884 x = l[i]; 885 v = mat->v + i; 886 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 887 } 888 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 889 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 890 } 891 if (rr) { 892 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 893 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 894 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 897 for (i=0; i<n; i++) { 898 x = r[i]; 899 v = mat->v + i*m; 900 for (j=0; j<m; j++) { (*v++) *= x;} 901 } 902 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 903 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 904 } 905 PetscFunctionReturn(0); 906 } 907 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatNorm_MPIDense" 910 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 911 { 912 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 913 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 914 PetscErrorCode ierr; 915 PetscInt i,j; 916 PetscReal sum = 0.0; 917 PetscScalar *v = mat->v; 918 919 PetscFunctionBegin; 920 if (mdn->size == 1) { 921 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 922 } else { 923 if (type == NORM_FROBENIUS) { 924 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 925 #if defined(PETSC_USE_COMPLEX) 926 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 927 #else 928 sum += (*v)*(*v); v++; 929 #endif 930 } 931 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 932 *nrm = sqrt(*nrm); 933 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 934 } else if (type == NORM_1) { 935 PetscReal *tmp,*tmp2; 936 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 937 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 938 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 939 *nrm = 0.0; 940 v = mat->v; 941 for (j=0; j<mdn->A->cmap->n; j++) { 942 for (i=0; i<mdn->A->rmap->n; i++) { 943 tmp[j] += PetscAbsScalar(*v); v++; 944 } 945 } 946 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 947 for (j=0; j<A->cmap->N; j++) { 948 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 949 } 950 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 951 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 952 } else if (type == NORM_INFINITY) { /* max row norm */ 953 PetscReal ntemp; 954 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 955 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 956 } else { 957 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 958 } 959 } 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatTranspose_MPIDense" 965 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 966 { 967 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 968 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 969 Mat B; 970 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 971 PetscErrorCode ierr; 972 PetscInt j,i; 973 PetscScalar *v; 974 975 PetscFunctionBegin; 976 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 977 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 978 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 979 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 980 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 981 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 982 } else { 983 B = *matout; 984 } 985 986 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 987 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 988 for (i=0; i<m; i++) rwork[i] = rstart + i; 989 for (j=0; j<n; j++) { 990 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 991 v += m; 992 } 993 ierr = PetscFree(rwork);CHKERRQ(ierr); 994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 996 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 997 *matout = B; 998 } else { 999 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 1005 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1006 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1011 { 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #if defined(PETSC_HAVE_PLAPACK) 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1023 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1024 { 1025 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1026 PetscErrorCode ierr; 1027 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1028 PetscScalar *array; 1029 PetscReal one = 1.0; 1030 1031 PetscFunctionBegin; 1032 /* Copy A into F->lu->A */ 1033 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1034 ierr = PLA_API_begin();CHKERRQ(ierr); 1035 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1037 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1038 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1039 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1040 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1041 ierr = PLA_API_end();CHKERRQ(ierr); 1042 lu->rstart = rstart; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1048 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1049 { 1050 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1051 PetscErrorCode ierr; 1052 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1053 PetscScalar *array; 1054 PetscReal one = 1.0; 1055 1056 PetscFunctionBegin; 1057 /* Copy F into A->lu->A */ 1058 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1059 ierr = PLA_API_begin();CHKERRQ(ierr); 1060 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1062 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1063 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1064 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1065 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1066 ierr = PLA_API_end();CHKERRQ(ierr); 1067 lu->rstart = rstart; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1073 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1074 { 1075 PetscErrorCode ierr; 1076 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1077 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1078 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1079 PLA_Obj alpha = NULL,beta = NULL; 1080 1081 PetscFunctionBegin; 1082 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1083 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1084 1085 /* 1086 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1087 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1088 */ 1089 1090 /* do the multiply in PLA */ 1091 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1092 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1093 CHKMEMQ; 1094 1095 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1096 CHKMEMQ; 1097 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1098 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1099 1100 /* 1101 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1102 */ 1103 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1109 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1110 { 1111 PetscErrorCode ierr; 1112 PetscInt m=A->rmap->n,n=B->cmap->n; 1113 Mat Cmat; 1114 1115 PetscFunctionBegin; 1116 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1117 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1118 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1119 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1120 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1121 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1122 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1123 1124 *C = Cmat; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1130 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1131 { 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 if (scall == MAT_INITIAL_MATRIX){ 1136 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1137 } 1138 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSolve_MPIDense" 1144 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1145 { 1146 MPI_Comm comm = ((PetscObject)A)->comm; 1147 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1148 PetscErrorCode ierr; 1149 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1150 PetscScalar *array; 1151 PetscReal one = 1.0; 1152 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1153 PLA_Obj v_pla = NULL; 1154 PetscScalar *loc_buf; 1155 Vec loc_x; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1160 1161 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1162 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1163 PLA_Obj_set_to_zero(v_pla); 1164 1165 /* Copy b into rhs_pla */ 1166 PLA_API_begin(); 1167 PLA_Obj_API_open(v_pla); 1168 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1169 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1170 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1171 PLA_Obj_API_close(v_pla); 1172 PLA_API_end(); 1173 1174 if (A->factor == MAT_FACTOR_LU){ 1175 /* Apply the permutations to the right hand sides */ 1176 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1177 1178 /* Solve L y = b, overwriting b with y */ 1179 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1180 1181 /* Solve U x = y (=b), overwriting b with x */ 1182 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1183 } else { /* MAT_FACTOR_CHOLESKY */ 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1185 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1186 PLA_NONUNIT_DIAG,lu->A,v_pla); 1187 } 1188 1189 /* Copy PLAPACK x into Petsc vector x */ 1190 PLA_Obj_local_length(v_pla, &loc_m); 1191 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1192 PLA_Obj_local_stride(v_pla, &loc_stride); 1193 /* 1194 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1195 */ 1196 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1197 if (!lu->pla_solved){ 1198 1199 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1200 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1201 1202 /* Create IS and cts for VecScatterring */ 1203 PLA_Obj_local_length(v_pla, &loc_m); 1204 PLA_Obj_local_stride(v_pla, &loc_stride); 1205 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1206 1207 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1208 for (i=0; i<loc_m; i+=lu->nb){ 1209 j = 0; 1210 while (j < lu->nb && i+j < loc_m){ 1211 idx_petsc[i+j] = rstart + j; j++; 1212 } 1213 rstart += size*lu->nb; 1214 } 1215 1216 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1217 1218 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1220 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1221 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1222 } 1223 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 1226 /* Free data */ 1227 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1228 PLA_Obj_free(&v_pla); 1229 1230 lu->pla_solved = PETSC_TRUE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1236 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1237 { 1238 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1239 PetscErrorCode ierr; 1240 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1241 PetscInt info_pla=0; 1242 PetscScalar *array,one = 1.0; 1243 1244 PetscFunctionBegin; 1245 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1246 PLA_Obj_free(&lu->A); 1247 PLA_Obj_free (&lu->pivots); 1248 } 1249 /* Create PLAPACK matrix object */ 1250 lu->A = NULL; lu->pivots = NULL; 1251 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1252 PLA_Obj_set_to_zero(lu->A); 1253 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1254 1255 /* Copy A into lu->A */ 1256 PLA_API_begin(); 1257 PLA_Obj_API_open(lu->A); 1258 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1259 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1260 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1261 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1262 PLA_Obj_API_close(lu->A); 1263 PLA_API_end(); 1264 1265 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1266 info_pla = PLA_LU(lu->A,lu->pivots); 1267 if (info_pla != 0) 1268 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1269 1270 lu->rstart = rstart; 1271 lu->mstruct = SAME_NONZERO_PATTERN; 1272 F->ops->solve = MatSolve_MPIDense; 1273 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1279 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1280 { 1281 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1282 PetscErrorCode ierr; 1283 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1284 PetscInt info_pla=0; 1285 PetscScalar *array,one = 1.0; 1286 1287 PetscFunctionBegin; 1288 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1289 PLA_Obj_free(&lu->A); 1290 } 1291 /* Create PLAPACK matrix object */ 1292 lu->A = NULL; 1293 lu->pivots = NULL; 1294 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1295 1296 /* Copy A into lu->A */ 1297 PLA_API_begin(); 1298 PLA_Obj_API_open(lu->A); 1299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1300 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1301 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1302 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1303 PLA_Obj_API_close(lu->A); 1304 PLA_API_end(); 1305 1306 /* Factor P A -> Chol */ 1307 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1308 if (info_pla != 0) 1309 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1310 1311 lu->rstart = rstart; 1312 lu->mstruct = SAME_NONZERO_PATTERN; 1313 F->ops->solve = MatSolve_MPIDense; 1314 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /* Note the Petsc perm permutation is ignored */ 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1321 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1322 { 1323 PetscErrorCode ierr; 1324 PetscTruth issymmetric,set; 1325 1326 PetscFunctionBegin; 1327 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1328 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1329 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* Note the Petsc r and c permutations are ignored */ 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1336 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1337 { 1338 PetscErrorCode ierr; 1339 PetscInt M = A->rmap->N; 1340 Mat_Plapack *lu; 1341 1342 PetscFunctionBegin; 1343 lu = (Mat_Plapack*)F->spptr; 1344 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1345 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 EXTERN_C_BEGIN 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1352 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1353 { 1354 PetscFunctionBegin; 1355 *type = MAT_SOLVER_PLAPACK; 1356 PetscFunctionReturn(0); 1357 } 1358 EXTERN_C_END 1359 1360 EXTERN_C_BEGIN 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1363 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1364 { 1365 PetscErrorCode ierr; 1366 Mat_Plapack *lu; 1367 PetscMPIInt size; 1368 PetscInt M=A->rmap->N; 1369 1370 PetscFunctionBegin; 1371 /* Create the factorization matrix */ 1372 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1373 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1374 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1375 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1376 (*F)->spptr = (void*)lu; 1377 1378 /* Set default Plapack parameters */ 1379 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1380 lu->nb = M/size; 1381 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1382 1383 /* Set runtime options */ 1384 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1385 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1386 PetscOptionsEnd(); 1387 1388 /* Create object distribution template */ 1389 lu->templ = NULL; 1390 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1391 1392 /* Set the datatype */ 1393 #if defined(PETSC_USE_COMPLEX) 1394 lu->datatype = MPI_DOUBLE_COMPLEX; 1395 #else 1396 lu->datatype = MPI_DOUBLE; 1397 #endif 1398 1399 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1400 1401 1402 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1403 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1404 1405 if (ftype == MAT_FACTOR_LU) { 1406 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1407 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1408 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1409 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1410 (*F)->factor = ftype; 1411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 EXTERN_C_END 1415 #endif 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1419 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1420 { 1421 #if defined(PETSC_HAVE_PLAPACK) 1422 PetscErrorCode ierr; 1423 #endif 1424 1425 PetscFunctionBegin; 1426 #if defined(PETSC_HAVE_PLAPACK) 1427 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1428 #else 1429 SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1430 #endif 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatAXPY_MPIDense" 1436 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1437 { 1438 PetscErrorCode ierr; 1439 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1440 1441 PetscFunctionBegin; 1442 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 /* -------------------------------------------------------------------*/ 1447 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1448 MatGetRow_MPIDense, 1449 MatRestoreRow_MPIDense, 1450 MatMult_MPIDense, 1451 /* 4*/ MatMultAdd_MPIDense, 1452 MatMultTranspose_MPIDense, 1453 MatMultTransposeAdd_MPIDense, 1454 0, 1455 0, 1456 0, 1457 /*10*/ 0, 1458 0, 1459 0, 1460 0, 1461 MatTranspose_MPIDense, 1462 /*15*/ MatGetInfo_MPIDense, 1463 MatEqual_MPIDense, 1464 MatGetDiagonal_MPIDense, 1465 MatDiagonalScale_MPIDense, 1466 MatNorm_MPIDense, 1467 /*20*/ MatAssemblyBegin_MPIDense, 1468 MatAssemblyEnd_MPIDense, 1469 MatSetOption_MPIDense, 1470 MatZeroEntries_MPIDense, 1471 /*24*/ MatZeroRows_MPIDense, 1472 0, 1473 0, 1474 0, 1475 0, 1476 /*29*/ MatSetUpPreallocation_MPIDense, 1477 0, 1478 0, 1479 MatGetArray_MPIDense, 1480 MatRestoreArray_MPIDense, 1481 /*34*/ MatDuplicate_MPIDense, 1482 0, 1483 0, 1484 0, 1485 0, 1486 /*39*/ MatAXPY_MPIDense, 1487 MatGetSubMatrices_MPIDense, 1488 0, 1489 MatGetValues_MPIDense, 1490 0, 1491 /*44*/ 0, 1492 MatScale_MPIDense, 1493 0, 1494 0, 1495 0, 1496 /*49*/ 0, 1497 0, 1498 0, 1499 0, 1500 0, 1501 /*54*/ 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*59*/ MatGetSubMatrix_MPIDense, 1507 MatDestroy_MPIDense, 1508 MatView_MPIDense, 1509 0, 1510 0, 1511 /*64*/ 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*69*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*74*/ 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /*79*/ 0, 1527 0, 1528 0, 1529 0, 1530 /*83*/ MatLoad_MPIDense, 1531 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 /*89*/ 1537 #if defined(PETSC_HAVE_PLAPACK) 1538 MatMatMult_MPIDense_MPIDense, 1539 MatMatMultSymbolic_MPIDense_MPIDense, 1540 MatMatMultNumeric_MPIDense_MPIDense, 1541 #else 1542 0, 1543 0, 1544 0, 1545 #endif 1546 0, 1547 /*94*/ 0, 1548 0, 1549 0, 1550 0}; 1551 1552 EXTERN_C_BEGIN 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1555 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1556 { 1557 Mat_MPIDense *a; 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 mat->preallocated = PETSC_TRUE; 1562 /* Note: For now, when data is specified above, this assumes the user correctly 1563 allocates the local dense storage space. We should add error checking. */ 1564 1565 a = (Mat_MPIDense*)mat->data; 1566 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1567 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1568 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1569 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1570 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 EXTERN_C_END 1574 1575 /*MC 1576 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1577 1578 run config/configure.py with the option --download-plapack 1579 1580 1581 Options Database Keys: 1582 . -mat_plapack_nprows <n> - number of rows in processor partition 1583 . -mat_plapack_npcols <n> - number of columns in processor partition 1584 . -mat_plapack_nb <n> - block size of template vector 1585 . -mat_plapack_nb_alg <n> - algorithmic block size 1586 - -mat_plapack_ckerror <n> - error checking flag 1587 1588 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1589 1590 M*/ 1591 1592 EXTERN_C_BEGIN 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatCreate_MPIDense" 1595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1596 { 1597 Mat_MPIDense *a; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1602 mat->data = (void*)a; 1603 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1604 mat->mapping = 0; 1605 1606 mat->insertmode = NOT_SET_VALUES; 1607 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1608 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1609 1610 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1611 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1612 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1613 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1614 a->nvec = mat->cmap->n; 1615 1616 /* build cache for off array entries formed */ 1617 a->donotstash = PETSC_FALSE; 1618 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1619 1620 /* stuff used for matrix vector multiply */ 1621 a->lvec = 0; 1622 a->Mvctx = 0; 1623 a->roworiented = PETSC_TRUE; 1624 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1626 "MatGetDiagonalBlock_MPIDense", 1627 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1629 "MatMPIDenseSetPreallocation_MPIDense", 1630 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1632 "MatMatMult_MPIAIJ_MPIDense", 1633 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1635 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1636 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1638 "MatMatMultNumeric_MPIAIJ_MPIDense", 1639 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1641 "MatGetFactor_mpidense_petsc", 1642 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1643 #if defined(PETSC_HAVE_PLAPACK) 1644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1645 "MatGetFactor_mpidense_plapack", 1646 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1647 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1648 #endif 1649 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1650 1651 PetscFunctionReturn(0); 1652 } 1653 EXTERN_C_END 1654 1655 /*MC 1656 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1657 1658 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1659 and MATMPIDENSE otherwise. 1660 1661 Options Database Keys: 1662 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1663 1664 Level: beginner 1665 1666 1667 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1668 M*/ 1669 1670 EXTERN_C_BEGIN 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatCreate_Dense" 1673 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1674 { 1675 PetscErrorCode ierr; 1676 PetscMPIInt size; 1677 1678 PetscFunctionBegin; 1679 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1680 if (size == 1) { 1681 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1682 } else { 1683 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 EXTERN_C_END 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1691 /*@C 1692 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1693 1694 Not collective 1695 1696 Input Parameters: 1697 . A - the matrix 1698 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1699 to control all matrix memory allocation. 1700 1701 Notes: 1702 The dense format is fully compatible with standard Fortran 77 1703 storage by columns. 1704 1705 The data input variable is intended primarily for Fortran programmers 1706 who wish to allocate their own matrix memory space. Most users should 1707 set data=PETSC_NULL. 1708 1709 Level: intermediate 1710 1711 .keywords: matrix,dense, parallel 1712 1713 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1714 @*/ 1715 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1716 { 1717 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1718 1719 PetscFunctionBegin; 1720 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1721 if (f) { 1722 ierr = (*f)(mat,data);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatCreateMPIDense" 1729 /*@C 1730 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1731 1732 Collective on MPI_Comm 1733 1734 Input Parameters: 1735 + comm - MPI communicator 1736 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1737 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1738 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1739 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1740 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1741 to control all matrix memory allocation. 1742 1743 Output Parameter: 1744 . A - the matrix 1745 1746 Notes: 1747 The dense format is fully compatible with standard Fortran 77 1748 storage by columns. 1749 1750 The data input variable is intended primarily for Fortran programmers 1751 who wish to allocate their own matrix memory space. Most users should 1752 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1753 1754 The user MUST specify either the local or global matrix dimensions 1755 (possibly both). 1756 1757 Level: intermediate 1758 1759 .keywords: matrix,dense, parallel 1760 1761 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1762 @*/ 1763 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1764 { 1765 PetscErrorCode ierr; 1766 PetscMPIInt size; 1767 1768 PetscFunctionBegin; 1769 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1770 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1771 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1772 if (size > 1) { 1773 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1774 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1775 } else { 1776 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1777 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1778 } 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatDuplicate_MPIDense" 1784 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1785 { 1786 Mat mat; 1787 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1788 PetscErrorCode ierr; 1789 1790 PetscFunctionBegin; 1791 *newmat = 0; 1792 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1793 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1794 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1795 a = (Mat_MPIDense*)mat->data; 1796 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1797 mat->factor = A->factor; 1798 mat->assembled = PETSC_TRUE; 1799 mat->preallocated = PETSC_TRUE; 1800 1801 mat->rmap->rstart = A->rmap->rstart; 1802 mat->rmap->rend = A->rmap->rend; 1803 a->size = oldmat->size; 1804 a->rank = oldmat->rank; 1805 mat->insertmode = NOT_SET_VALUES; 1806 a->nvec = oldmat->nvec; 1807 a->donotstash = oldmat->donotstash; 1808 1809 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1810 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1811 1812 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1813 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1814 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1815 1816 *newmat = mat; 1817 PetscFunctionReturn(0); 1818 } 1819 1820 #include "petscsys.h" 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1824 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1825 { 1826 PetscErrorCode ierr; 1827 PetscMPIInt rank,size; 1828 PetscInt *rowners,i,m,nz,j; 1829 PetscScalar *array,*vals,*vals_ptr; 1830 MPI_Status status; 1831 1832 PetscFunctionBegin; 1833 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1834 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1835 1836 /* determine ownership of all rows */ 1837 m = M/size + ((M % size) > rank); 1838 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1839 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1840 rowners[0] = 0; 1841 for (i=2; i<=size; i++) { 1842 rowners[i] += rowners[i-1]; 1843 } 1844 1845 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1846 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1847 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1848 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1849 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1850 1851 if (!rank) { 1852 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1853 1854 /* read in my part of the matrix numerical values */ 1855 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1856 1857 /* insert into matrix-by row (this is why cannot directly read into array */ 1858 vals_ptr = vals; 1859 for (i=0; i<m; i++) { 1860 for (j=0; j<N; j++) { 1861 array[i + j*m] = *vals_ptr++; 1862 } 1863 } 1864 1865 /* read in other processors and ship out */ 1866 for (i=1; i<size; i++) { 1867 nz = (rowners[i+1] - rowners[i])*N; 1868 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1869 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1870 } 1871 } else { 1872 /* receive numeric values */ 1873 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1874 1875 /* receive message of values*/ 1876 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1877 1878 /* insert into matrix-by row (this is why cannot directly read into array */ 1879 vals_ptr = vals; 1880 for (i=0; i<m; i++) { 1881 for (j=0; j<N; j++) { 1882 array[i + j*m] = *vals_ptr++; 1883 } 1884 } 1885 } 1886 ierr = PetscFree(rowners);CHKERRQ(ierr); 1887 ierr = PetscFree(vals);CHKERRQ(ierr); 1888 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1889 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatLoad_MPIDense" 1895 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1896 { 1897 Mat A; 1898 PetscScalar *vals,*svals; 1899 MPI_Comm comm = ((PetscObject)viewer)->comm; 1900 MPI_Status status; 1901 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1902 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1903 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1904 PetscInt i,nz,j,rstart,rend; 1905 int fd; 1906 PetscErrorCode ierr; 1907 1908 PetscFunctionBegin; 1909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1910 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1911 if (!rank) { 1912 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1913 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1914 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1915 } 1916 1917 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1918 M = header[1]; N = header[2]; nz = header[3]; 1919 1920 /* 1921 Handle case where matrix is stored on disk as a dense matrix 1922 */ 1923 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1924 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 /* determine ownership of all rows */ 1929 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1930 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1931 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1932 rowners[0] = 0; 1933 for (i=2; i<=size; i++) { 1934 rowners[i] += rowners[i-1]; 1935 } 1936 rstart = rowners[rank]; 1937 rend = rowners[rank+1]; 1938 1939 /* distribute row lengths to all processors */ 1940 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1941 if (!rank) { 1942 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1943 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1944 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1945 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1946 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1947 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1948 } else { 1949 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1950 } 1951 1952 if (!rank) { 1953 /* calculate the number of nonzeros on each processor */ 1954 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1955 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1956 for (i=0; i<size; i++) { 1957 for (j=rowners[i]; j< rowners[i+1]; j++) { 1958 procsnz[i] += rowlengths[j]; 1959 } 1960 } 1961 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1962 1963 /* determine max buffer needed and allocate it */ 1964 maxnz = 0; 1965 for (i=0; i<size; i++) { 1966 maxnz = PetscMax(maxnz,procsnz[i]); 1967 } 1968 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1969 1970 /* read in my part of the matrix column indices */ 1971 nz = procsnz[0]; 1972 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1973 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1974 1975 /* read in every one elses and ship off */ 1976 for (i=1; i<size; i++) { 1977 nz = procsnz[i]; 1978 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1979 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1980 } 1981 ierr = PetscFree(cols);CHKERRQ(ierr); 1982 } else { 1983 /* determine buffer space needed for message */ 1984 nz = 0; 1985 for (i=0; i<m; i++) { 1986 nz += ourlens[i]; 1987 } 1988 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1989 1990 /* receive message of column indices*/ 1991 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1992 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1993 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1994 } 1995 1996 /* loop over local rows, determining number of off diagonal entries */ 1997 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1998 jj = 0; 1999 for (i=0; i<m; i++) { 2000 for (j=0; j<ourlens[i]; j++) { 2001 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2002 jj++; 2003 } 2004 } 2005 2006 /* create our matrix */ 2007 for (i=0; i<m; i++) { 2008 ourlens[i] -= offlens[i]; 2009 } 2010 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2011 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2012 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2013 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2014 A = *newmat; 2015 for (i=0; i<m; i++) { 2016 ourlens[i] += offlens[i]; 2017 } 2018 2019 if (!rank) { 2020 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2021 2022 /* read in my part of the matrix numerical values */ 2023 nz = procsnz[0]; 2024 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2025 2026 /* insert into matrix */ 2027 jj = rstart; 2028 smycols = mycols; 2029 svals = vals; 2030 for (i=0; i<m; i++) { 2031 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2032 smycols += ourlens[i]; 2033 svals += ourlens[i]; 2034 jj++; 2035 } 2036 2037 /* read in other processors and ship out */ 2038 for (i=1; i<size; i++) { 2039 nz = procsnz[i]; 2040 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2041 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2042 } 2043 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2044 } else { 2045 /* receive numeric values */ 2046 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2047 2048 /* receive message of values*/ 2049 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2050 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2051 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2052 2053 /* insert into matrix */ 2054 jj = rstart; 2055 smycols = mycols; 2056 svals = vals; 2057 for (i=0; i<m; i++) { 2058 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2059 smycols += ourlens[i]; 2060 svals += ourlens[i]; 2061 jj++; 2062 } 2063 } 2064 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2065 ierr = PetscFree(vals);CHKERRQ(ierr); 2066 ierr = PetscFree(mycols);CHKERRQ(ierr); 2067 ierr = PetscFree(rowners);CHKERRQ(ierr); 2068 2069 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatEqual_MPIDense" 2076 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2077 { 2078 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2079 Mat a,b; 2080 PetscTruth flg; 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 a = matA->A; 2085 b = matB->A; 2086 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2087 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #if defined(PETSC_HAVE_PLAPACK) 2092 2093 #undef __FUNCT__ 2094 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2095 /*@C 2096 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2097 Level: developer 2098 2099 .keywords: Petsc, destroy, package, PLAPACK 2100 .seealso: PetscFinalize() 2101 @*/ 2102 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2103 { 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 ierr = PLA_Finalize();CHKERRQ(ierr); 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2113 /*@C 2114 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2115 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2116 2117 Input Parameter: 2118 . comm - the communicator the matrix lives on 2119 2120 Level: developer 2121 2122 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2123 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2124 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2125 cannot overlap. 2126 2127 .keywords: Petsc, initialize, package, PLAPACK 2128 .seealso: PetscInitializePackage(), PetscInitialize() 2129 @*/ 2130 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2131 { 2132 PetscMPIInt size; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 if (!PLA_Initialized(PETSC_NULL)) { 2137 2138 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2139 Plapack_nprows = 1; 2140 Plapack_npcols = size; 2141 2142 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2143 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2144 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2145 #if defined(PETSC_USE_DEBUG) 2146 Plapack_ierror = 3; 2147 #else 2148 Plapack_ierror = 0; 2149 #endif 2150 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2151 if (Plapack_ierror){ 2152 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2153 } else { 2154 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2155 } 2156 2157 Plapack_nb_alg = 0; 2158 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2159 if (Plapack_nb_alg) { 2160 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2161 } 2162 PetscOptionsEnd(); 2163 2164 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2165 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2166 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2167 } 2168 PetscFunctionReturn(0); 2169 } 2170 2171 #endif 2172