1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 for (i=0; i<m; i++) { 130 if (idxm[i] < 0) continue; 131 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 132 if (idxm[i] >= rstart && idxm[i] < rend) { 133 row = idxm[i] - rstart; 134 if (roworiented) { 135 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 136 } else { 137 for (j=0; j<n; j++) { 138 if (idxn[j] < 0) continue; 139 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 141 } 142 } 143 } else { 144 if (!A->donotstash) { 145 if (roworiented) { 146 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 147 } else { 148 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 149 } 150 } 151 } 152 } 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatGetValues_MPIDense" 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscErrorCode ierr; 162 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 163 164 PetscFunctionBegin; 165 for (i=0; i<m; i++) { 166 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 167 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 168 if (idxm[i] >= rstart && idxm[i] < rend) { 169 row = idxm[i] - rstart; 170 for (j=0; j<n; j++) { 171 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 172 if (idxn[j] >= mat->cmap->N) { 173 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 } 175 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 176 } 177 } else { 178 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 179 } 180 } 181 PetscFunctionReturn(0); 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatGetArray_MPIDense" 186 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 198 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 199 { 200 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 201 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 202 PetscErrorCode ierr; 203 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 204 const PetscInt *irow,*icol; 205 PetscScalar *av,*bv,*v = lmat->v; 206 Mat newmat; 207 IS iscol_local; 208 209 PetscFunctionBegin; 210 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 211 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 212 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 213 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 214 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 215 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 216 217 /* No parallel redistribution currently supported! Should really check each index set 218 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 219 original matrix! */ 220 221 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 222 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 223 224 /* Check submatrix call */ 225 if (scall == MAT_REUSE_MATRIX) { 226 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 227 /* Really need to test rows and column sizes! */ 228 newmat = *B; 229 } else { 230 /* Create and fill new matrix */ 231 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 232 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 233 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 234 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 235 } 236 237 /* Now extract the data pointers and do the copy, column at a time */ 238 newmatd = (Mat_MPIDense*)newmat->data; 239 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 240 241 for (i=0; i<Ncols; i++) { 242 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 243 for (j=0; j<nrows; j++) { 244 *bv++ = av[irow[j] - rstart]; 245 } 246 } 247 248 /* Assemble the matrices so that the correct flags are set */ 249 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 250 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 252 /* Free work space */ 253 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 254 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 255 *B = newmat; 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatRestoreArray_MPIDense" 261 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 262 { 263 PetscFunctionBegin; 264 PetscFunctionReturn(0); 265 } 266 267 #undef __FUNCT__ 268 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 270 { 271 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 272 MPI_Comm comm = ((PetscObject)mat)->comm; 273 PetscErrorCode ierr; 274 PetscInt nstash,reallocs; 275 InsertMode addv; 276 277 PetscFunctionBegin; 278 /* make sure all processors are either in INSERTMODE or ADDMODE */ 279 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 280 if (addv == (ADD_VALUES|INSERT_VALUES)) { 281 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 282 } 283 mat->insertmode = addv; /* in case this processor had no cache */ 284 285 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 286 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 287 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 293 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 294 { 295 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 296 PetscErrorCode ierr; 297 PetscInt i,*row,*col,flg,j,rstart,ncols; 298 PetscMPIInt n; 299 PetscScalar *val; 300 InsertMode addv=mat->insertmode; 301 302 PetscFunctionBegin; 303 /* wait on receives */ 304 while (1) { 305 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 306 if (!flg) break; 307 308 for (i=0; i<n;) { 309 /* Now identify the consecutive vals belonging to the same row */ 310 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 311 if (j < n) ncols = j-i; 312 else ncols = n-i; 313 /* Now assemble all these values with a single function call */ 314 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 315 i = j; 316 } 317 } 318 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 319 320 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 322 323 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 324 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 325 } 326 PetscFunctionReturn(0); 327 } 328 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatZeroEntries_MPIDense" 331 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 332 { 333 PetscErrorCode ierr; 334 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 335 336 PetscFunctionBegin; 337 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 341 /* the code does not do the diagonal entries correctly unless the 342 matrix is square and the column and row owerships are identical. 343 This is a BUG. The only way to fix it seems to be to access 344 mdn->A and mdn->B directly and not through the MatZeroRows() 345 routine. 346 */ 347 #undef __FUNCT__ 348 #define __FUNCT__ "MatZeroRows_MPIDense" 349 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 350 { 351 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 352 PetscErrorCode ierr; 353 PetscInt i,*owners = A->rmap->range; 354 PetscInt *nprocs,j,idx,nsends; 355 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 356 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 357 PetscInt *lens,*lrows,*values; 358 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 359 MPI_Comm comm = ((PetscObject)A)->comm; 360 MPI_Request *send_waits,*recv_waits; 361 MPI_Status recv_status,*send_status; 362 PetscTruth found; 363 364 PetscFunctionBegin; 365 /* first count number of contributors to each processor */ 366 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 367 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 368 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 369 for (i=0; i<N; i++) { 370 idx = rows[i]; 371 found = PETSC_FALSE; 372 for (j=0; j<size; j++) { 373 if (idx >= owners[j] && idx < owners[j+1]) { 374 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 375 } 376 } 377 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 378 } 379 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 380 381 /* inform other processors of number of messages and max length*/ 382 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 383 384 /* post receives: */ 385 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 386 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 387 for (i=0; i<nrecvs; i++) { 388 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 389 } 390 391 /* do sends: 392 1) starts[i] gives the starting index in svalues for stuff going to 393 the ith processor 394 */ 395 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 396 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 397 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 398 starts[0] = 0; 399 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 400 for (i=0; i<N; i++) { 401 svalues[starts[owner[i]]++] = rows[i]; 402 } 403 404 starts[0] = 0; 405 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 406 count = 0; 407 for (i=0; i<size; i++) { 408 if (nprocs[2*i+1]) { 409 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 410 } 411 } 412 ierr = PetscFree(starts);CHKERRQ(ierr); 413 414 base = owners[rank]; 415 416 /* wait on receives */ 417 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 418 source = lens + nrecvs; 419 count = nrecvs; slen = 0; 420 while (count) { 421 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 422 /* unpack receives into our local space */ 423 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 424 source[imdex] = recv_status.MPI_SOURCE; 425 lens[imdex] = n; 426 slen += n; 427 count--; 428 } 429 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 430 431 /* move the data into the send scatter */ 432 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 433 count = 0; 434 for (i=0; i<nrecvs; i++) { 435 values = rvalues + i*nmax; 436 for (j=0; j<lens[i]; j++) { 437 lrows[count++] = values[j] - base; 438 } 439 } 440 ierr = PetscFree(rvalues);CHKERRQ(ierr); 441 ierr = PetscFree(lens);CHKERRQ(ierr); 442 ierr = PetscFree(owner);CHKERRQ(ierr); 443 ierr = PetscFree(nprocs);CHKERRQ(ierr); 444 445 /* actually zap the local rows */ 446 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 447 ierr = PetscFree(lrows);CHKERRQ(ierr); 448 449 /* wait on sends */ 450 if (nsends) { 451 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 452 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 453 ierr = PetscFree(send_status);CHKERRQ(ierr); 454 } 455 ierr = PetscFree(send_waits);CHKERRQ(ierr); 456 ierr = PetscFree(svalues);CHKERRQ(ierr); 457 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatMult_MPIDense" 463 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 464 { 465 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 466 PetscErrorCode ierr; 467 468 PetscFunctionBegin; 469 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 470 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatMultAdd_MPIDense" 477 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 478 { 479 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 480 PetscErrorCode ierr; 481 482 PetscFunctionBegin; 483 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 484 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 485 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 486 PetscFunctionReturn(0); 487 } 488 489 #undef __FUNCT__ 490 #define __FUNCT__ "MatMultTranspose_MPIDense" 491 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 492 { 493 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 494 PetscErrorCode ierr; 495 PetscScalar zero = 0.0; 496 497 PetscFunctionBegin; 498 ierr = VecSet(yy,zero);CHKERRQ(ierr); 499 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 500 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 501 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 507 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 508 { 509 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 510 PetscErrorCode ierr; 511 512 PetscFunctionBegin; 513 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 514 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 515 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 516 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 PetscFunctionReturn(0); 518 } 519 520 #undef __FUNCT__ 521 #define __FUNCT__ "MatGetDiagonal_MPIDense" 522 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 523 { 524 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 525 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 526 PetscErrorCode ierr; 527 PetscInt len,i,n,m = A->rmap->n,radd; 528 PetscScalar *x,zero = 0.0; 529 530 PetscFunctionBegin; 531 ierr = VecSet(v,zero);CHKERRQ(ierr); 532 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 533 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 534 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 535 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 536 radd = A->rmap->rstart*m; 537 for (i=0; i<len; i++) { 538 x[i] = aloc->v[radd + i*m + i]; 539 } 540 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatDestroy_MPIDense" 546 PetscErrorCode MatDestroy_MPIDense(Mat mat) 547 { 548 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 549 PetscErrorCode ierr; 550 #if defined(PETSC_HAVE_PLAPACK) 551 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 552 #endif 553 554 PetscFunctionBegin; 555 556 #if defined(PETSC_USE_LOG) 557 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 558 #endif 559 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 560 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 561 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 562 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 563 #if defined(PETSC_HAVE_PLAPACK) 564 if (lu) { 565 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 566 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 567 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 568 569 if (lu->is_pla) { 570 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 571 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 572 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 573 } 574 } 575 #endif 576 577 ierr = PetscFree(mdn);CHKERRQ(ierr); 578 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatView_MPIDense_Binary" 589 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 590 { 591 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 592 PetscErrorCode ierr; 593 PetscViewerFormat format; 594 int fd; 595 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 596 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 597 PetscScalar *work,*v,*vv; 598 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 599 MPI_Status status; 600 601 PetscFunctionBegin; 602 if (mdn->size == 1) { 603 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 604 } else { 605 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 606 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 607 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 608 609 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 610 if (format == PETSC_VIEWER_NATIVE) { 611 612 if (!rank) { 613 /* store the matrix as a dense matrix */ 614 header[0] = MAT_FILE_COOKIE; 615 header[1] = mat->rmap->N; 616 header[2] = N; 617 header[3] = MATRIX_BINARY_FORMAT_DENSE; 618 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 619 620 /* get largest work array needed for transposing array */ 621 mmax = mat->rmap->n; 622 for (i=1; i<size; i++) { 623 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 624 } 625 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 626 627 /* write out local array, by rows */ 628 m = mat->rmap->n; 629 v = a->v; 630 for (j=0; j<N; j++) { 631 for (i=0; i<m; i++) { 632 work[j + i*N] = *v++; 633 } 634 } 635 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 636 /* get largest work array to receive messages from other processes, excludes process zero */ 637 mmax = 0; 638 for (i=1; i<size; i++) { 639 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 640 } 641 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 642 for(k = 1; k < size; k++) { 643 v = vv; 644 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 645 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 646 647 for(j = 0; j < N; j++) { 648 for(i = 0; i < m; i++) { 649 work[j + i*N] = *v++; 650 } 651 } 652 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 653 } 654 ierr = PetscFree(work);CHKERRQ(ierr); 655 ierr = PetscFree(vv);CHKERRQ(ierr); 656 } else { 657 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 658 } 659 } else { 660 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 661 } 662 } 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 668 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 669 { 670 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 671 PetscErrorCode ierr; 672 PetscMPIInt size = mdn->size,rank = mdn->rank; 673 const PetscViewerType vtype; 674 PetscTruth iascii,isdraw; 675 PetscViewer sviewer; 676 PetscViewerFormat format; 677 #if defined(PETSC_HAVE_PLAPACK) 678 Mat_Plapack *lu; 679 #endif 680 681 PetscFunctionBegin; 682 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 683 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 684 if (iascii) { 685 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 686 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 687 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 688 MatInfo info; 689 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 691 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 692 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 693 #if defined(PETSC_HAVE_PLAPACK) 694 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 695 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 696 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 697 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 698 if (mat->factor){ 699 lu=(Mat_Plapack*)(mat->spptr); 700 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 701 } 702 #else 703 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 704 #endif 705 PetscFunctionReturn(0); 706 } else if (format == PETSC_VIEWER_ASCII_INFO) { 707 PetscFunctionReturn(0); 708 } 709 } else if (isdraw) { 710 PetscDraw draw; 711 PetscTruth isnull; 712 713 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 714 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 715 if (isnull) PetscFunctionReturn(0); 716 } 717 718 if (size == 1) { 719 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 720 } else { 721 /* assemble the entire matrix onto first processor. */ 722 Mat A; 723 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 724 PetscInt *cols; 725 PetscScalar *vals; 726 727 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 728 if (!rank) { 729 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 730 } else { 731 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 732 } 733 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 734 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 735 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 736 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 737 738 /* Copy the matrix ... This isn't the most efficient means, 739 but it's quick for now */ 740 A->insertmode = INSERT_VALUES; 741 row = mat->rmap->rstart; m = mdn->A->rmap->n; 742 for (i=0; i<m; i++) { 743 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 744 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 745 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 746 row++; 747 } 748 749 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 750 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 751 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 752 if (!rank) { 753 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 754 } 755 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 756 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 757 ierr = MatDestroy(A);CHKERRQ(ierr); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatView_MPIDense" 764 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 765 { 766 PetscErrorCode ierr; 767 PetscTruth iascii,isbinary,isdraw,issocket; 768 769 PetscFunctionBegin; 770 771 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 772 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 773 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 774 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 775 776 if (iascii || issocket || isdraw) { 777 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 778 } else if (isbinary) { 779 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 780 } else { 781 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatGetInfo_MPIDense" 788 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 789 { 790 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 791 Mat mdn = mat->A; 792 PetscErrorCode ierr; 793 PetscReal isend[5],irecv[5]; 794 795 PetscFunctionBegin; 796 info->block_size = 1.0; 797 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 798 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 799 isend[3] = info->memory; isend[4] = info->mallocs; 800 if (flag == MAT_LOCAL) { 801 info->nz_used = isend[0]; 802 info->nz_allocated = isend[1]; 803 info->nz_unneeded = isend[2]; 804 info->memory = isend[3]; 805 info->mallocs = isend[4]; 806 } else if (flag == MAT_GLOBAL_MAX) { 807 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 808 info->nz_used = irecv[0]; 809 info->nz_allocated = irecv[1]; 810 info->nz_unneeded = irecv[2]; 811 info->memory = irecv[3]; 812 info->mallocs = irecv[4]; 813 } else if (flag == MAT_GLOBAL_SUM) { 814 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 815 info->nz_used = irecv[0]; 816 info->nz_allocated = irecv[1]; 817 info->nz_unneeded = irecv[2]; 818 info->memory = irecv[3]; 819 info->mallocs = irecv[4]; 820 } 821 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 822 info->fill_ratio_needed = 0; 823 info->factor_mallocs = 0; 824 PetscFunctionReturn(0); 825 } 826 827 #undef __FUNCT__ 828 #define __FUNCT__ "MatSetOption_MPIDense" 829 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 830 { 831 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 832 PetscErrorCode ierr; 833 834 PetscFunctionBegin; 835 switch (op) { 836 case MAT_NEW_NONZERO_LOCATIONS: 837 case MAT_NEW_NONZERO_LOCATION_ERR: 838 case MAT_NEW_NONZERO_ALLOCATION_ERR: 839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 840 break; 841 case MAT_ROW_ORIENTED: 842 a->roworiented = flg; 843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 844 break; 845 case MAT_NEW_DIAGONALS: 846 case MAT_USE_HASH_TABLE: 847 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 848 break; 849 case MAT_IGNORE_OFF_PROC_ENTRIES: 850 a->donotstash = flg; 851 break; 852 case MAT_SYMMETRIC: 853 case MAT_STRUCTURALLY_SYMMETRIC: 854 case MAT_HERMITIAN: 855 case MAT_SYMMETRY_ETERNAL: 856 case MAT_IGNORE_LOWER_TRIANGULAR: 857 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 858 break; 859 default: 860 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 861 } 862 PetscFunctionReturn(0); 863 } 864 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatDiagonalScale_MPIDense" 868 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 869 { 870 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 871 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 872 PetscScalar *l,*r,x,*v; 873 PetscErrorCode ierr; 874 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 875 876 PetscFunctionBegin; 877 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 878 if (ll) { 879 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 880 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 881 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 882 for (i=0; i<m; i++) { 883 x = l[i]; 884 v = mat->v + i; 885 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 886 } 887 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 888 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 889 } 890 if (rr) { 891 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 892 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 893 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 896 for (i=0; i<n; i++) { 897 x = r[i]; 898 v = mat->v + i*m; 899 for (j=0; j<m; j++) { (*v++) *= x;} 900 } 901 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 902 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 903 } 904 PetscFunctionReturn(0); 905 } 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "MatNorm_MPIDense" 909 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 910 { 911 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 912 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 913 PetscErrorCode ierr; 914 PetscInt i,j; 915 PetscReal sum = 0.0; 916 PetscScalar *v = mat->v; 917 918 PetscFunctionBegin; 919 if (mdn->size == 1) { 920 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 921 } else { 922 if (type == NORM_FROBENIUS) { 923 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 924 #if defined(PETSC_USE_COMPLEX) 925 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 926 #else 927 sum += (*v)*(*v); v++; 928 #endif 929 } 930 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 931 *nrm = sqrt(*nrm); 932 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 933 } else if (type == NORM_1) { 934 PetscReal *tmp,*tmp2; 935 ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 936 tmp2 = tmp + A->cmap->N; 937 ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 938 *nrm = 0.0; 939 v = mat->v; 940 for (j=0; j<mdn->A->cmap->n; j++) { 941 for (i=0; i<mdn->A->rmap->n; i++) { 942 tmp[j] += PetscAbsScalar(*v); v++; 943 } 944 } 945 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 946 for (j=0; j<A->cmap->N; j++) { 947 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 948 } 949 ierr = PetscFree(tmp);CHKERRQ(ierr); 950 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 951 } else if (type == NORM_INFINITY) { /* max row norm */ 952 PetscReal ntemp; 953 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 954 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 955 } else { 956 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 957 } 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatTranspose_MPIDense" 964 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 965 { 966 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 967 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 968 Mat B; 969 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 970 PetscErrorCode ierr; 971 PetscInt j,i; 972 PetscScalar *v; 973 974 PetscFunctionBegin; 975 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 976 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 977 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 978 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 979 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 980 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 981 } else { 982 B = *matout; 983 } 984 985 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 986 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 987 for (i=0; i<m; i++) rwork[i] = rstart + i; 988 for (j=0; j<n; j++) { 989 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 990 v += m; 991 } 992 ierr = PetscFree(rwork);CHKERRQ(ierr); 993 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 994 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 995 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 996 *matout = B; 997 } else { 998 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 1004 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1005 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1009 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1010 { 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #if defined(PETSC_HAVE_PLAPACK) 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1022 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1023 { 1024 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1025 PetscErrorCode ierr; 1026 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1027 PetscScalar *array; 1028 PetscReal one = 1.0; 1029 1030 PetscFunctionBegin; 1031 /* Copy A into F->lu->A */ 1032 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1033 ierr = PLA_API_begin();CHKERRQ(ierr); 1034 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1035 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1036 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1037 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1038 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1039 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1040 ierr = PLA_API_end();CHKERRQ(ierr); 1041 lu->rstart = rstart; 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1047 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1048 { 1049 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1050 PetscErrorCode ierr; 1051 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1052 PetscScalar *array; 1053 PetscReal one = 1.0; 1054 1055 PetscFunctionBegin; 1056 /* Copy F into A->lu->A */ 1057 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1058 ierr = PLA_API_begin();CHKERRQ(ierr); 1059 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1060 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1061 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1062 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1063 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1064 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1065 ierr = PLA_API_end();CHKERRQ(ierr); 1066 lu->rstart = rstart; 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1072 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1073 { 1074 PetscErrorCode ierr; 1075 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1076 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1077 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1078 PLA_Obj alpha = NULL,beta = NULL; 1079 1080 PetscFunctionBegin; 1081 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1082 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1083 1084 /* 1085 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1086 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1087 */ 1088 1089 /* do the multiply in PLA */ 1090 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1091 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1092 CHKMEMQ; 1093 1094 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1095 CHKMEMQ; 1096 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1097 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1098 1099 /* 1100 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1101 */ 1102 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1108 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1109 { 1110 PetscErrorCode ierr; 1111 PetscInt m=A->rmap->n,n=B->cmap->n; 1112 Mat Cmat; 1113 1114 PetscFunctionBegin; 1115 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1116 SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported"); 1117 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1118 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1119 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1120 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1121 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1122 1123 *C = Cmat; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1129 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1130 { 1131 PetscErrorCode ierr; 1132 1133 PetscFunctionBegin; 1134 if (scall == MAT_INITIAL_MATRIX){ 1135 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1136 } 1137 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatSolve_MPIDense" 1143 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1144 { 1145 MPI_Comm comm = ((PetscObject)A)->comm; 1146 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1147 PetscErrorCode ierr; 1148 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1149 PetscScalar *array; 1150 PetscReal one = 1.0; 1151 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1152 PLA_Obj v_pla = NULL; 1153 PetscScalar *loc_buf; 1154 Vec loc_x; 1155 1156 PetscFunctionBegin; 1157 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1158 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1159 1160 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1161 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1162 PLA_Obj_set_to_zero(v_pla); 1163 1164 /* Copy b into rhs_pla */ 1165 PLA_API_begin(); 1166 PLA_Obj_API_open(v_pla); 1167 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1168 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1169 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1170 PLA_Obj_API_close(v_pla); 1171 PLA_API_end(); 1172 1173 if (A->factor == MAT_FACTOR_LU){ 1174 /* Apply the permutations to the right hand sides */ 1175 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1176 1177 /* Solve L y = b, overwriting b with y */ 1178 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1179 1180 /* Solve U x = y (=b), overwriting b with x */ 1181 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1182 } else { /* MAT_FACTOR_CHOLESKY */ 1183 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1185 PLA_NONUNIT_DIAG,lu->A,v_pla); 1186 } 1187 1188 /* Copy PLAPACK x into Petsc vector x */ 1189 PLA_Obj_local_length(v_pla, &loc_m); 1190 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1191 PLA_Obj_local_stride(v_pla, &loc_stride); 1192 /* 1193 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1194 */ 1195 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1196 if (!lu->pla_solved){ 1197 1198 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1199 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1200 1201 /* Create IS and cts for VecScatterring */ 1202 PLA_Obj_local_length(v_pla, &loc_m); 1203 PLA_Obj_local_stride(v_pla, &loc_stride); 1204 ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr); 1205 idx_petsc = idx_pla + loc_m; 1206 1207 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1208 for (i=0; i<loc_m; i+=lu->nb){ 1209 j = 0; 1210 while (j < lu->nb && i+j < loc_m){ 1211 idx_petsc[i+j] = rstart + j; j++; 1212 } 1213 rstart += size*lu->nb; 1214 } 1215 1216 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1217 1218 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1220 ierr = PetscFree(idx_pla);CHKERRQ(ierr); 1221 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1222 } 1223 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 1226 /* Free data */ 1227 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1228 PLA_Obj_free(&v_pla); 1229 1230 lu->pla_solved = PETSC_TRUE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1236 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1237 { 1238 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1239 PetscErrorCode ierr; 1240 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1241 PetscInt info_pla=0; 1242 PetscScalar *array,one = 1.0; 1243 1244 PetscFunctionBegin; 1245 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1246 PLA_Obj_free(&lu->A); 1247 PLA_Obj_free (&lu->pivots); 1248 } 1249 /* Create PLAPACK matrix object */ 1250 lu->A = NULL; lu->pivots = NULL; 1251 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1252 PLA_Obj_set_to_zero(lu->A); 1253 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1254 1255 /* Copy A into lu->A */ 1256 PLA_API_begin(); 1257 PLA_Obj_API_open(lu->A); 1258 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1259 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1260 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1261 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1262 PLA_Obj_API_close(lu->A); 1263 PLA_API_end(); 1264 1265 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1266 info_pla = PLA_LU(lu->A,lu->pivots); 1267 if (info_pla != 0) 1268 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1269 1270 lu->rstart = rstart; 1271 lu->mstruct = SAME_NONZERO_PATTERN; 1272 F->ops->solve = MatSolve_MPIDense; 1273 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1279 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1280 { 1281 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1282 PetscErrorCode ierr; 1283 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1284 PetscInt info_pla=0; 1285 PetscScalar *array,one = 1.0; 1286 1287 PetscFunctionBegin; 1288 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1289 PLA_Obj_free(&lu->A); 1290 } 1291 /* Create PLAPACK matrix object */ 1292 lu->A = NULL; 1293 lu->pivots = NULL; 1294 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1295 1296 /* Copy A into lu->A */ 1297 PLA_API_begin(); 1298 PLA_Obj_API_open(lu->A); 1299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1300 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1301 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1302 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1303 PLA_Obj_API_close(lu->A); 1304 PLA_API_end(); 1305 1306 /* Factor P A -> Chol */ 1307 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1308 if (info_pla != 0) 1309 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1310 1311 lu->rstart = rstart; 1312 lu->mstruct = SAME_NONZERO_PATTERN; 1313 F->ops->solve = MatSolve_MPIDense; 1314 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /* Note the Petsc perm permutation is ignored */ 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1321 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1322 { 1323 PetscErrorCode ierr; 1324 PetscTruth issymmetric,set; 1325 1326 PetscFunctionBegin; 1327 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1328 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1329 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* Note the Petsc r and c permutations are ignored */ 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1336 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1337 { 1338 PetscErrorCode ierr; 1339 PetscInt M = A->rmap->N; 1340 Mat_Plapack *lu; 1341 1342 PetscFunctionBegin; 1343 lu = (Mat_Plapack*)F->spptr; 1344 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1345 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 EXTERN_C_BEGIN 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1352 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1353 { 1354 PetscFunctionBegin; 1355 *type = MAT_SOLVER_PLAPACK; 1356 PetscFunctionReturn(0); 1357 } 1358 EXTERN_C_END 1359 1360 EXTERN_C_BEGIN 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1363 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1364 { 1365 PetscErrorCode ierr; 1366 Mat_Plapack *lu; 1367 PetscMPIInt size; 1368 PetscInt M=A->rmap->N; 1369 1370 PetscFunctionBegin; 1371 /* Create the factorization matrix */ 1372 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1373 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1374 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1375 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1376 (*F)->spptr = (void*)lu; 1377 1378 /* Set default Plapack parameters */ 1379 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1380 lu->nb = M/size; 1381 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1382 1383 /* Set runtime options */ 1384 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1385 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1386 PetscOptionsEnd(); 1387 1388 /* Create object distribution template */ 1389 lu->templ = NULL; 1390 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1391 1392 /* Set the datatype */ 1393 #if defined(PETSC_USE_COMPLEX) 1394 lu->datatype = MPI_DOUBLE_COMPLEX; 1395 #else 1396 lu->datatype = MPI_DOUBLE; 1397 #endif 1398 1399 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1400 1401 1402 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1403 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1404 1405 if (ftype == MAT_FACTOR_LU) { 1406 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1407 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1408 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1409 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1410 (*F)->factor = ftype; 1411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 EXTERN_C_END 1415 #endif 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1419 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1420 { 1421 #if defined(PETSC_HAVE_PLAPACK) 1422 PetscErrorCode ierr; 1423 #endif 1424 1425 PetscFunctionBegin; 1426 #if defined(PETSC_HAVE_PLAPACK) 1427 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1428 #else 1429 SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1430 #endif 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatAXPY_MPIDense" 1436 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1437 { 1438 PetscErrorCode ierr; 1439 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1440 1441 PetscFunctionBegin; 1442 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 /* -------------------------------------------------------------------*/ 1447 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1448 MatGetRow_MPIDense, 1449 MatRestoreRow_MPIDense, 1450 MatMult_MPIDense, 1451 /* 4*/ MatMultAdd_MPIDense, 1452 MatMultTranspose_MPIDense, 1453 MatMultTransposeAdd_MPIDense, 1454 0, 1455 0, 1456 0, 1457 /*10*/ 0, 1458 0, 1459 0, 1460 0, 1461 MatTranspose_MPIDense, 1462 /*15*/ MatGetInfo_MPIDense, 1463 MatEqual_MPIDense, 1464 MatGetDiagonal_MPIDense, 1465 MatDiagonalScale_MPIDense, 1466 MatNorm_MPIDense, 1467 /*20*/ MatAssemblyBegin_MPIDense, 1468 MatAssemblyEnd_MPIDense, 1469 MatSetOption_MPIDense, 1470 MatZeroEntries_MPIDense, 1471 /*24*/ MatZeroRows_MPIDense, 1472 0, 1473 0, 1474 0, 1475 0, 1476 /*29*/ MatSetUpPreallocation_MPIDense, 1477 0, 1478 0, 1479 MatGetArray_MPIDense, 1480 MatRestoreArray_MPIDense, 1481 /*34*/ MatDuplicate_MPIDense, 1482 0, 1483 0, 1484 0, 1485 0, 1486 /*39*/ MatAXPY_MPIDense, 1487 MatGetSubMatrices_MPIDense, 1488 0, 1489 MatGetValues_MPIDense, 1490 0, 1491 /*44*/ 0, 1492 MatScale_MPIDense, 1493 0, 1494 0, 1495 0, 1496 /*49*/ 0, 1497 0, 1498 0, 1499 0, 1500 0, 1501 /*54*/ 0, 1502 0, 1503 0, 1504 0, 1505 0, 1506 /*59*/ MatGetSubMatrix_MPIDense, 1507 MatDestroy_MPIDense, 1508 MatView_MPIDense, 1509 0, 1510 0, 1511 /*64*/ 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 /*69*/ 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 /*74*/ 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 /*79*/ 0, 1527 0, 1528 0, 1529 0, 1530 /*83*/ MatLoad_MPIDense, 1531 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 /*89*/ 1537 #if defined(PETSC_HAVE_PLAPACK) 1538 MatMatMult_MPIDense_MPIDense, 1539 MatMatMultSymbolic_MPIDense_MPIDense, 1540 MatMatMultNumeric_MPIDense_MPIDense, 1541 #else 1542 0, 1543 0, 1544 0, 1545 #endif 1546 0, 1547 /*94*/ 0, 1548 0, 1549 0, 1550 0}; 1551 1552 EXTERN_C_BEGIN 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1555 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1556 { 1557 Mat_MPIDense *a; 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 mat->preallocated = PETSC_TRUE; 1562 /* Note: For now, when data is specified above, this assumes the user correctly 1563 allocates the local dense storage space. We should add error checking. */ 1564 1565 a = (Mat_MPIDense*)mat->data; 1566 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1567 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1568 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1569 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1570 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 EXTERN_C_END 1574 1575 /*MC 1576 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1577 1578 run config/configure.py with the option --download-plapack 1579 1580 1581 Options Database Keys: 1582 . -mat_plapack_nprows <n> - number of rows in processor partition 1583 . -mat_plapack_npcols <n> - number of columns in processor partition 1584 . -mat_plapack_nb <n> - block size of template vector 1585 . -mat_plapack_nb_alg <n> - algorithmic block size 1586 - -mat_plapack_ckerror <n> - error checking flag 1587 1588 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1589 1590 M*/ 1591 1592 EXTERN_C_BEGIN 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatCreate_MPIDense" 1595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1596 { 1597 Mat_MPIDense *a; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1602 mat->data = (void*)a; 1603 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1604 mat->mapping = 0; 1605 1606 mat->insertmode = NOT_SET_VALUES; 1607 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1608 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1609 1610 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1611 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1612 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1613 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1614 a->nvec = mat->cmap->n; 1615 1616 /* build cache for off array entries formed */ 1617 a->donotstash = PETSC_FALSE; 1618 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1619 1620 /* stuff used for matrix vector multiply */ 1621 a->lvec = 0; 1622 a->Mvctx = 0; 1623 a->roworiented = PETSC_TRUE; 1624 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1626 "MatGetDiagonalBlock_MPIDense", 1627 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1629 "MatMPIDenseSetPreallocation_MPIDense", 1630 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1632 "MatMatMult_MPIAIJ_MPIDense", 1633 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1635 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1636 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1638 "MatMatMultNumeric_MPIAIJ_MPIDense", 1639 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1641 "MatGetFactor_mpidense_petsc", 1642 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1643 #if defined(PETSC_HAVE_PLAPACK) 1644 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1645 "MatGetFactor_mpidense_plapack", 1646 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1647 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1648 #endif 1649 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1650 1651 PetscFunctionReturn(0); 1652 } 1653 EXTERN_C_END 1654 1655 /*MC 1656 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1657 1658 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1659 and MATMPIDENSE otherwise. 1660 1661 Options Database Keys: 1662 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1663 1664 Level: beginner 1665 1666 1667 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1668 M*/ 1669 1670 EXTERN_C_BEGIN 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatCreate_Dense" 1673 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1674 { 1675 PetscErrorCode ierr; 1676 PetscMPIInt size; 1677 1678 PetscFunctionBegin; 1679 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1680 if (size == 1) { 1681 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1682 } else { 1683 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 EXTERN_C_END 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1691 /*@C 1692 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1693 1694 Not collective 1695 1696 Input Parameters: 1697 . A - the matrix 1698 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1699 to control all matrix memory allocation. 1700 1701 Notes: 1702 The dense format is fully compatible with standard Fortran 77 1703 storage by columns. 1704 1705 The data input variable is intended primarily for Fortran programmers 1706 who wish to allocate their own matrix memory space. Most users should 1707 set data=PETSC_NULL. 1708 1709 Level: intermediate 1710 1711 .keywords: matrix,dense, parallel 1712 1713 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1714 @*/ 1715 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1716 { 1717 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1718 1719 PetscFunctionBegin; 1720 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1721 if (f) { 1722 ierr = (*f)(mat,data);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 #undef __FUNCT__ 1728 #define __FUNCT__ "MatCreateMPIDense" 1729 /*@C 1730 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1731 1732 Collective on MPI_Comm 1733 1734 Input Parameters: 1735 + comm - MPI communicator 1736 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1737 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1738 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1739 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1740 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1741 to control all matrix memory allocation. 1742 1743 Output Parameter: 1744 . A - the matrix 1745 1746 Notes: 1747 The dense format is fully compatible with standard Fortran 77 1748 storage by columns. 1749 1750 The data input variable is intended primarily for Fortran programmers 1751 who wish to allocate their own matrix memory space. Most users should 1752 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1753 1754 The user MUST specify either the local or global matrix dimensions 1755 (possibly both). 1756 1757 Level: intermediate 1758 1759 .keywords: matrix,dense, parallel 1760 1761 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1762 @*/ 1763 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1764 { 1765 PetscErrorCode ierr; 1766 PetscMPIInt size; 1767 1768 PetscFunctionBegin; 1769 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1770 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1771 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1772 if (size > 1) { 1773 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1774 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1775 } else { 1776 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1777 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1778 } 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatDuplicate_MPIDense" 1784 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1785 { 1786 Mat mat; 1787 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1788 PetscErrorCode ierr; 1789 1790 PetscFunctionBegin; 1791 *newmat = 0; 1792 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1793 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1794 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1795 a = (Mat_MPIDense*)mat->data; 1796 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1797 mat->factor = A->factor; 1798 mat->assembled = PETSC_TRUE; 1799 mat->preallocated = PETSC_TRUE; 1800 1801 mat->rmap->rstart = A->rmap->rstart; 1802 mat->rmap->rend = A->rmap->rend; 1803 a->size = oldmat->size; 1804 a->rank = oldmat->rank; 1805 mat->insertmode = NOT_SET_VALUES; 1806 a->nvec = oldmat->nvec; 1807 a->donotstash = oldmat->donotstash; 1808 1809 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1810 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1811 ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr); 1812 1813 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1814 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1815 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1816 1817 *newmat = mat; 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #include "petscsys.h" 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1825 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1826 { 1827 PetscErrorCode ierr; 1828 PetscMPIInt rank,size; 1829 PetscInt *rowners,i,m,nz,j; 1830 PetscScalar *array,*vals,*vals_ptr; 1831 MPI_Status status; 1832 1833 PetscFunctionBegin; 1834 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1835 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1836 1837 /* determine ownership of all rows */ 1838 m = M/size + ((M % size) > rank); 1839 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1840 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1841 rowners[0] = 0; 1842 for (i=2; i<=size; i++) { 1843 rowners[i] += rowners[i-1]; 1844 } 1845 1846 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1847 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1848 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1849 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1850 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1851 1852 if (!rank) { 1853 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1854 1855 /* read in my part of the matrix numerical values */ 1856 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1857 1858 /* insert into matrix-by row (this is why cannot directly read into array */ 1859 vals_ptr = vals; 1860 for (i=0; i<m; i++) { 1861 for (j=0; j<N; j++) { 1862 array[i + j*m] = *vals_ptr++; 1863 } 1864 } 1865 1866 /* read in other processors and ship out */ 1867 for (i=1; i<size; i++) { 1868 nz = (rowners[i+1] - rowners[i])*N; 1869 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1870 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1871 } 1872 } else { 1873 /* receive numeric values */ 1874 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1875 1876 /* receive message of values*/ 1877 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1878 1879 /* insert into matrix-by row (this is why cannot directly read into array */ 1880 vals_ptr = vals; 1881 for (i=0; i<m; i++) { 1882 for (j=0; j<N; j++) { 1883 array[i + j*m] = *vals_ptr++; 1884 } 1885 } 1886 } 1887 ierr = PetscFree(rowners);CHKERRQ(ierr); 1888 ierr = PetscFree(vals);CHKERRQ(ierr); 1889 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1890 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatLoad_MPIDense" 1896 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1897 { 1898 Mat A; 1899 PetscScalar *vals,*svals; 1900 MPI_Comm comm = ((PetscObject)viewer)->comm; 1901 MPI_Status status; 1902 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1903 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1904 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1905 PetscInt i,nz,j,rstart,rend; 1906 int fd; 1907 PetscErrorCode ierr; 1908 1909 PetscFunctionBegin; 1910 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1911 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1912 if (!rank) { 1913 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1914 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1915 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1916 } 1917 1918 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1919 M = header[1]; N = header[2]; nz = header[3]; 1920 1921 /* 1922 Handle case where matrix is stored on disk as a dense matrix 1923 */ 1924 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1925 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 /* determine ownership of all rows */ 1930 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1931 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1932 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1933 rowners[0] = 0; 1934 for (i=2; i<=size; i++) { 1935 rowners[i] += rowners[i-1]; 1936 } 1937 rstart = rowners[rank]; 1938 rend = rowners[rank+1]; 1939 1940 /* distribute row lengths to all processors */ 1941 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1942 offlens = ourlens + (rend-rstart); 1943 if (!rank) { 1944 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1945 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1946 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1947 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1948 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1949 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1950 } else { 1951 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1952 } 1953 1954 if (!rank) { 1955 /* calculate the number of nonzeros on each processor */ 1956 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1957 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1958 for (i=0; i<size; i++) { 1959 for (j=rowners[i]; j< rowners[i+1]; j++) { 1960 procsnz[i] += rowlengths[j]; 1961 } 1962 } 1963 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1964 1965 /* determine max buffer needed and allocate it */ 1966 maxnz = 0; 1967 for (i=0; i<size; i++) { 1968 maxnz = PetscMax(maxnz,procsnz[i]); 1969 } 1970 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1971 1972 /* read in my part of the matrix column indices */ 1973 nz = procsnz[0]; 1974 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1975 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1976 1977 /* read in every one elses and ship off */ 1978 for (i=1; i<size; i++) { 1979 nz = procsnz[i]; 1980 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1981 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1982 } 1983 ierr = PetscFree(cols);CHKERRQ(ierr); 1984 } else { 1985 /* determine buffer space needed for message */ 1986 nz = 0; 1987 for (i=0; i<m; i++) { 1988 nz += ourlens[i]; 1989 } 1990 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1991 1992 /* receive message of column indices*/ 1993 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1994 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1995 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1996 } 1997 1998 /* loop over local rows, determining number of off diagonal entries */ 1999 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2000 jj = 0; 2001 for (i=0; i<m; i++) { 2002 for (j=0; j<ourlens[i]; j++) { 2003 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2004 jj++; 2005 } 2006 } 2007 2008 /* create our matrix */ 2009 for (i=0; i<m; i++) { 2010 ourlens[i] -= offlens[i]; 2011 } 2012 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2013 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2014 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2015 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2016 A = *newmat; 2017 for (i=0; i<m; i++) { 2018 ourlens[i] += offlens[i]; 2019 } 2020 2021 if (!rank) { 2022 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2023 2024 /* read in my part of the matrix numerical values */ 2025 nz = procsnz[0]; 2026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2027 2028 /* insert into matrix */ 2029 jj = rstart; 2030 smycols = mycols; 2031 svals = vals; 2032 for (i=0; i<m; i++) { 2033 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2034 smycols += ourlens[i]; 2035 svals += ourlens[i]; 2036 jj++; 2037 } 2038 2039 /* read in other processors and ship out */ 2040 for (i=1; i<size; i++) { 2041 nz = procsnz[i]; 2042 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2043 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2044 } 2045 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2046 } else { 2047 /* receive numeric values */ 2048 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2049 2050 /* receive message of values*/ 2051 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2052 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2053 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2054 2055 /* insert into matrix */ 2056 jj = rstart; 2057 smycols = mycols; 2058 svals = vals; 2059 for (i=0; i<m; i++) { 2060 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2061 smycols += ourlens[i]; 2062 svals += ourlens[i]; 2063 jj++; 2064 } 2065 } 2066 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2067 ierr = PetscFree(vals);CHKERRQ(ierr); 2068 ierr = PetscFree(mycols);CHKERRQ(ierr); 2069 ierr = PetscFree(rowners);CHKERRQ(ierr); 2070 2071 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2072 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatEqual_MPIDense" 2078 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2079 { 2080 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2081 Mat a,b; 2082 PetscTruth flg; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 a = matA->A; 2087 b = matB->A; 2088 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2089 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2090 PetscFunctionReturn(0); 2091 } 2092 2093 #if defined(PETSC_HAVE_PLAPACK) 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2097 /*@C 2098 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2099 Level: developer 2100 2101 .keywords: Petsc, destroy, package, PLAPACK 2102 .seealso: PetscFinalize() 2103 @*/ 2104 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2105 { 2106 PetscErrorCode ierr; 2107 2108 PetscFunctionBegin; 2109 ierr = PLA_Finalize();CHKERRQ(ierr); 2110 PetscFunctionReturn(0); 2111 } 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2115 /*@C 2116 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2117 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2118 2119 Input Parameter: 2120 . comm - the communicator the matrix lives on 2121 2122 Level: developer 2123 2124 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2125 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2126 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2127 cannot overlap. 2128 2129 .keywords: Petsc, initialize, package, PLAPACK 2130 .seealso: PetscInitializePackage(), PetscInitialize() 2131 @*/ 2132 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2133 { 2134 PetscMPIInt size; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 if (!PLA_Initialized(PETSC_NULL)) { 2139 2140 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2141 Plapack_nprows = 1; 2142 Plapack_npcols = size; 2143 2144 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2145 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2146 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2147 #if defined(PETSC_USE_DEBUG) 2148 Plapack_ierror = 3; 2149 #else 2150 Plapack_ierror = 0; 2151 #endif 2152 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2153 if (Plapack_ierror){ 2154 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2155 } else { 2156 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2157 } 2158 2159 Plapack_nb_alg = 0; 2160 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2161 if (Plapack_nb_alg) { 2162 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2163 } 2164 PetscOptionsEnd(); 2165 2166 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2167 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2168 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2169 } 2170 PetscFunctionReturn(0); 2171 } 2172 2173 #endif 2174