1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) { 174 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 175 } 176 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 177 } 178 } else { 179 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatGetArray_MPIDense" 187 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 199 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 200 { 201 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 202 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 203 PetscErrorCode ierr; 204 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 205 const PetscInt *irow,*icol; 206 PetscScalar *av,*bv,*v = lmat->v; 207 Mat newmat; 208 IS iscol_local; 209 210 PetscFunctionBegin; 211 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 212 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 213 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 214 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 215 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 216 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 217 218 /* No parallel redistribution currently supported! Should really check each index set 219 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 220 original matrix! */ 221 222 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 223 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 224 225 /* Check submatrix call */ 226 if (scall == MAT_REUSE_MATRIX) { 227 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 228 /* Really need to test rows and column sizes! */ 229 newmat = *B; 230 } else { 231 /* Create and fill new matrix */ 232 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 233 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 234 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 235 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 236 } 237 238 /* Now extract the data pointers and do the copy, column at a time */ 239 newmatd = (Mat_MPIDense*)newmat->data; 240 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 241 242 for (i=0; i<Ncols; i++) { 243 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 244 for (j=0; j<nrows; j++) { 245 *bv++ = av[irow[j] - rstart]; 246 } 247 } 248 249 /* Assemble the matrices so that the correct flags are set */ 250 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 252 253 /* Free work space */ 254 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 255 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 256 *B = newmat; 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreArray_MPIDense" 262 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 263 { 264 PetscFunctionBegin; 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 270 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 273 MPI_Comm comm = ((PetscObject)mat)->comm; 274 PetscErrorCode ierr; 275 PetscInt nstash,reallocs; 276 InsertMode addv; 277 278 PetscFunctionBegin; 279 /* make sure all processors are either in INSERTMODE or ADDMODE */ 280 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 281 if (addv == (ADD_VALUES|INSERT_VALUES)) { 282 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 283 } 284 mat->insertmode = addv; /* in case this processor had no cache */ 285 286 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 287 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 288 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 294 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 295 { 296 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 297 PetscErrorCode ierr; 298 PetscInt i,*row,*col,flg,j,rstart,ncols; 299 PetscMPIInt n; 300 PetscScalar *val; 301 InsertMode addv=mat->insertmode; 302 303 PetscFunctionBegin; 304 /* wait on receives */ 305 while (1) { 306 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 307 if (!flg) break; 308 309 for (i=0; i<n;) { 310 /* Now identify the consecutive vals belonging to the same row */ 311 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 312 if (j < n) ncols = j-i; 313 else ncols = n-i; 314 /* Now assemble all these values with a single function call */ 315 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 316 i = j; 317 } 318 } 319 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 320 321 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 322 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 323 324 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 325 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 326 } 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatZeroEntries_MPIDense" 332 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 333 { 334 PetscErrorCode ierr; 335 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 336 337 PetscFunctionBegin; 338 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 342 /* the code does not do the diagonal entries correctly unless the 343 matrix is square and the column and row owerships are identical. 344 This is a BUG. The only way to fix it seems to be to access 345 mdn->A and mdn->B directly and not through the MatZeroRows() 346 routine. 347 */ 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatZeroRows_MPIDense" 350 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 351 { 352 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 PetscInt i,*owners = A->rmap->range; 355 PetscInt *nprocs,j,idx,nsends; 356 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 357 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 358 PetscInt *lens,*lrows,*values; 359 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 360 MPI_Comm comm = ((PetscObject)A)->comm; 361 MPI_Request *send_waits,*recv_waits; 362 MPI_Status recv_status,*send_status; 363 PetscTruth found; 364 365 PetscFunctionBegin; 366 /* first count number of contributors to each processor */ 367 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 368 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 369 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 370 for (i=0; i<N; i++) { 371 idx = rows[i]; 372 found = PETSC_FALSE; 373 for (j=0; j<size; j++) { 374 if (idx >= owners[j] && idx < owners[j+1]) { 375 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 376 } 377 } 378 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 379 } 380 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 381 382 /* inform other processors of number of messages and max length*/ 383 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 384 385 /* post receives: */ 386 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 387 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 388 for (i=0; i<nrecvs; i++) { 389 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 390 } 391 392 /* do sends: 393 1) starts[i] gives the starting index in svalues for stuff going to 394 the ith processor 395 */ 396 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 397 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 398 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 399 starts[0] = 0; 400 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 for (i=0; i<N; i++) { 402 svalues[starts[owner[i]]++] = rows[i]; 403 } 404 405 starts[0] = 0; 406 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 407 count = 0; 408 for (i=0; i<size; i++) { 409 if (nprocs[2*i+1]) { 410 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 411 } 412 } 413 ierr = PetscFree(starts);CHKERRQ(ierr); 414 415 base = owners[rank]; 416 417 /* wait on receives */ 418 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 419 count = nrecvs; 420 slen = 0; 421 while (count) { 422 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 423 /* unpack receives into our local space */ 424 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 425 source[imdex] = recv_status.MPI_SOURCE; 426 lens[imdex] = n; 427 slen += n; 428 count--; 429 } 430 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 431 432 /* move the data into the send scatter */ 433 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 434 count = 0; 435 for (i=0; i<nrecvs; i++) { 436 values = rvalues + i*nmax; 437 for (j=0; j<lens[i]; j++) { 438 lrows[count++] = values[j] - base; 439 } 440 } 441 ierr = PetscFree(rvalues);CHKERRQ(ierr); 442 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 443 ierr = PetscFree(owner);CHKERRQ(ierr); 444 ierr = PetscFree(nprocs);CHKERRQ(ierr); 445 446 /* actually zap the local rows */ 447 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 448 ierr = PetscFree(lrows);CHKERRQ(ierr); 449 450 /* wait on sends */ 451 if (nsends) { 452 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 453 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 454 ierr = PetscFree(send_status);CHKERRQ(ierr); 455 } 456 ierr = PetscFree(send_waits);CHKERRQ(ierr); 457 ierr = PetscFree(svalues);CHKERRQ(ierr); 458 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatMult_MPIDense" 464 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 465 { 466 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467 PetscErrorCode ierr; 468 469 PetscFunctionBegin; 470 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatMultAdd_MPIDense" 478 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 485 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 487 PetscFunctionReturn(0); 488 } 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatMultTranspose_MPIDense" 492 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 493 { 494 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 495 PetscErrorCode ierr; 496 PetscScalar zero = 0.0; 497 498 PetscFunctionBegin; 499 ierr = VecSet(yy,zero);CHKERRQ(ierr); 500 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 501 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 502 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 508 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 509 { 510 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 515 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 516 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatGetDiagonal_MPIDense" 523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 524 { 525 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 526 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 527 PetscErrorCode ierr; 528 PetscInt len,i,n,m = A->rmap->n,radd; 529 PetscScalar *x,zero = 0.0; 530 531 PetscFunctionBegin; 532 ierr = VecSet(v,zero);CHKERRQ(ierr); 533 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 534 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 535 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 536 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 537 radd = A->rmap->rstart*m; 538 for (i=0; i<len; i++) { 539 x[i] = aloc->v[radd + i*m + i]; 540 } 541 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatDestroy_MPIDense" 547 PetscErrorCode MatDestroy_MPIDense(Mat mat) 548 { 549 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 550 PetscErrorCode ierr; 551 #if defined(PETSC_HAVE_PLAPACK) 552 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 553 #endif 554 555 PetscFunctionBegin; 556 557 #if defined(PETSC_USE_LOG) 558 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 559 #endif 560 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 561 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 562 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 563 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 564 #if defined(PETSC_HAVE_PLAPACK) 565 if (lu) { 566 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 567 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 568 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 569 570 if (lu->is_pla) { 571 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 572 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 574 } 575 } 576 #endif 577 578 ierr = PetscFree(mdn);CHKERRQ(ierr); 579 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense_Binary" 590 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 591 { 592 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 593 PetscErrorCode ierr; 594 PetscViewerFormat format; 595 int fd; 596 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 597 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 598 PetscScalar *work,*v,*vv; 599 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 600 MPI_Status status; 601 602 PetscFunctionBegin; 603 if (mdn->size == 1) { 604 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 605 } else { 606 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 607 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 608 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 609 610 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 611 if (format == PETSC_VIEWER_NATIVE) { 612 613 if (!rank) { 614 /* store the matrix as a dense matrix */ 615 header[0] = MAT_FILE_CLASSID; 616 header[1] = mat->rmap->N; 617 header[2] = N; 618 header[3] = MATRIX_BINARY_FORMAT_DENSE; 619 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 620 621 /* get largest work array needed for transposing array */ 622 mmax = mat->rmap->n; 623 for (i=1; i<size; i++) { 624 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 625 } 626 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 627 628 /* write out local array, by rows */ 629 m = mat->rmap->n; 630 v = a->v; 631 for (j=0; j<N; j++) { 632 for (i=0; i<m; i++) { 633 work[j + i*N] = *v++; 634 } 635 } 636 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 637 /* get largest work array to receive messages from other processes, excludes process zero */ 638 mmax = 0; 639 for (i=1; i<size; i++) { 640 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 641 } 642 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 643 for(k = 1; k < size; k++) { 644 v = vv; 645 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 646 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 647 648 for(j = 0; j < N; j++) { 649 for(i = 0; i < m; i++) { 650 work[j + i*N] = *v++; 651 } 652 } 653 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 654 } 655 ierr = PetscFree(work);CHKERRQ(ierr); 656 ierr = PetscFree(vv);CHKERRQ(ierr); 657 } else { 658 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 } 660 } else { 661 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 669 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 670 { 671 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 672 PetscErrorCode ierr; 673 PetscMPIInt size = mdn->size,rank = mdn->rank; 674 const PetscViewerType vtype; 675 PetscTruth iascii,isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 #if defined(PETSC_HAVE_PLAPACK) 679 Mat_Plapack *lu; 680 #endif 681 682 PetscFunctionBegin; 683 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 684 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 685 if (iascii) { 686 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 687 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 688 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 689 MatInfo info; 690 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 692 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 693 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 694 #if defined(PETSC_HAVE_PLAPACK) 695 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 696 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 697 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 698 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 699 if (mat->factortype){ 700 lu=(Mat_Plapack*)(mat->spptr); 701 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 702 } 703 #else 704 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 705 #endif 706 PetscFunctionReturn(0); 707 } else if (format == PETSC_VIEWER_ASCII_INFO) { 708 PetscFunctionReturn(0); 709 } 710 } else if (isdraw) { 711 PetscDraw draw; 712 PetscTruth isnull; 713 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 716 if (isnull) PetscFunctionReturn(0); 717 } 718 719 if (size == 1) { 720 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 725 PetscInt *cols; 726 PetscScalar *vals; 727 728 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 729 if (!rank) { 730 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 731 } else { 732 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 733 } 734 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 735 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 736 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 737 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 738 739 /* Copy the matrix ... This isn't the most efficient means, 740 but it's quick for now */ 741 A->insertmode = INSERT_VALUES; 742 row = mat->rmap->rstart; m = mdn->A->rmap->n; 743 for (i=0; i<m; i++) { 744 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 745 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 746 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 747 row++; 748 } 749 750 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 752 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 753 if (!rank) { 754 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 755 } 756 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 758 ierr = MatDestroy(A);CHKERRQ(ierr); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatView_MPIDense" 765 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 766 { 767 PetscErrorCode ierr; 768 PetscTruth iascii,isbinary,isdraw,issocket; 769 770 PetscFunctionBegin; 771 772 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 773 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 774 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 775 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 776 777 if (iascii || issocket || isdraw) { 778 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 779 } else if (isbinary) { 780 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 781 } else { 782 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatGetInfo_MPIDense" 789 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 790 { 791 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 792 Mat mdn = mat->A; 793 PetscErrorCode ierr; 794 PetscReal isend[5],irecv[5]; 795 796 PetscFunctionBegin; 797 info->block_size = 1.0; 798 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 799 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 800 isend[3] = info->memory; isend[4] = info->mallocs; 801 if (flag == MAT_LOCAL) { 802 info->nz_used = isend[0]; 803 info->nz_allocated = isend[1]; 804 info->nz_unneeded = isend[2]; 805 info->memory = isend[3]; 806 info->mallocs = isend[4]; 807 } else if (flag == MAT_GLOBAL_MAX) { 808 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 809 info->nz_used = irecv[0]; 810 info->nz_allocated = irecv[1]; 811 info->nz_unneeded = irecv[2]; 812 info->memory = irecv[3]; 813 info->mallocs = irecv[4]; 814 } else if (flag == MAT_GLOBAL_SUM) { 815 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 816 info->nz_used = irecv[0]; 817 info->nz_allocated = irecv[1]; 818 info->nz_unneeded = irecv[2]; 819 info->memory = irecv[3]; 820 info->mallocs = irecv[4]; 821 } 822 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 823 info->fill_ratio_needed = 0; 824 info->factor_mallocs = 0; 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatSetOption_MPIDense" 830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_NEW_NONZERO_LOCATIONS: 838 case MAT_NEW_NONZERO_LOCATION_ERR: 839 case MAT_NEW_NONZERO_ALLOCATION_ERR: 840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 841 break; 842 case MAT_ROW_ORIENTED: 843 a->roworiented = flg; 844 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 845 break; 846 case MAT_NEW_DIAGONALS: 847 case MAT_USE_HASH_TABLE: 848 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 849 break; 850 case MAT_IGNORE_OFF_PROC_ENTRIES: 851 a->donotstash = flg; 852 break; 853 case MAT_SYMMETRIC: 854 case MAT_STRUCTURALLY_SYMMETRIC: 855 case MAT_HERMITIAN: 856 case MAT_SYMMETRY_ETERNAL: 857 case MAT_IGNORE_LOWER_TRIANGULAR: 858 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 859 break; 860 default: 861 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 862 } 863 PetscFunctionReturn(0); 864 } 865 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatDiagonalScale_MPIDense" 869 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 870 { 871 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 872 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 873 PetscScalar *l,*r,x,*v; 874 PetscErrorCode ierr; 875 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 876 877 PetscFunctionBegin; 878 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 879 if (ll) { 880 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 881 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 882 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 883 for (i=0; i<m; i++) { 884 x = l[i]; 885 v = mat->v + i; 886 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 887 } 888 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 889 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 890 } 891 if (rr) { 892 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 893 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 894 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 897 for (i=0; i<n; i++) { 898 x = r[i]; 899 v = mat->v + i*m; 900 for (j=0; j<m; j++) { (*v++) *= x;} 901 } 902 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 903 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 904 } 905 PetscFunctionReturn(0); 906 } 907 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatNorm_MPIDense" 910 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 911 { 912 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 913 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 914 PetscErrorCode ierr; 915 PetscInt i,j; 916 PetscReal sum = 0.0; 917 PetscScalar *v = mat->v; 918 919 PetscFunctionBegin; 920 if (mdn->size == 1) { 921 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 922 } else { 923 if (type == NORM_FROBENIUS) { 924 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 925 #if defined(PETSC_USE_COMPLEX) 926 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 927 #else 928 sum += (*v)*(*v); v++; 929 #endif 930 } 931 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 932 *nrm = sqrt(*nrm); 933 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 934 } else if (type == NORM_1) { 935 PetscReal *tmp,*tmp2; 936 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 937 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 938 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 939 *nrm = 0.0; 940 v = mat->v; 941 for (j=0; j<mdn->A->cmap->n; j++) { 942 for (i=0; i<mdn->A->rmap->n; i++) { 943 tmp[j] += PetscAbsScalar(*v); v++; 944 } 945 } 946 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 947 for (j=0; j<A->cmap->N; j++) { 948 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 949 } 950 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 951 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 952 } else if (type == NORM_INFINITY) { /* max row norm */ 953 PetscReal ntemp; 954 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 955 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 956 } else { 957 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 958 } 959 } 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatTranspose_MPIDense" 965 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 966 { 967 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 968 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 969 Mat B; 970 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 971 PetscErrorCode ierr; 972 PetscInt j,i; 973 PetscScalar *v; 974 975 PetscFunctionBegin; 976 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 977 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 978 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 979 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 980 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 981 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 982 } else { 983 B = *matout; 984 } 985 986 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 987 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 988 for (i=0; i<m; i++) rwork[i] = rstart + i; 989 for (j=0; j<n; j++) { 990 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 991 v += m; 992 } 993 ierr = PetscFree(rwork);CHKERRQ(ierr); 994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 996 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 997 *matout = B; 998 } else { 999 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 1005 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1006 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1011 { 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #if defined(PETSC_HAVE_PLAPACK) 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1023 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1024 { 1025 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1026 PetscErrorCode ierr; 1027 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1028 PetscScalar *array; 1029 PetscReal one = 1.0; 1030 1031 PetscFunctionBegin; 1032 /* Copy A into F->lu->A */ 1033 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1034 ierr = PLA_API_begin();CHKERRQ(ierr); 1035 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1037 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1038 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1039 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1040 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1041 ierr = PLA_API_end();CHKERRQ(ierr); 1042 lu->rstart = rstart; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1048 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1049 { 1050 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1051 PetscErrorCode ierr; 1052 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1053 PetscScalar *array; 1054 PetscReal one = 1.0; 1055 1056 PetscFunctionBegin; 1057 /* Copy F into A->lu->A */ 1058 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1059 ierr = PLA_API_begin();CHKERRQ(ierr); 1060 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1062 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1063 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1064 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1065 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1066 ierr = PLA_API_end();CHKERRQ(ierr); 1067 lu->rstart = rstart; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1073 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1074 { 1075 PetscErrorCode ierr; 1076 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1077 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1078 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1079 PLA_Obj alpha = NULL,beta = NULL; 1080 1081 PetscFunctionBegin; 1082 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1083 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1084 1085 /* 1086 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1087 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1088 */ 1089 1090 /* do the multiply in PLA */ 1091 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1092 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1093 CHKMEMQ; 1094 1095 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1096 CHKMEMQ; 1097 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1098 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1099 1100 /* 1101 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1102 */ 1103 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1109 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1110 { 1111 PetscErrorCode ierr; 1112 PetscInt m=A->rmap->n,n=B->cmap->n; 1113 Mat Cmat; 1114 1115 PetscFunctionBegin; 1116 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1117 SETERRQ(PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1118 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1119 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1120 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1121 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1122 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1123 1124 *C = Cmat; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1130 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1131 { 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 if (scall == MAT_INITIAL_MATRIX){ 1136 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1137 } 1138 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSolve_MPIDense" 1144 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1145 { 1146 MPI_Comm comm = ((PetscObject)A)->comm; 1147 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1148 PetscErrorCode ierr; 1149 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1150 PetscScalar *array; 1151 PetscReal one = 1.0; 1152 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1153 PLA_Obj v_pla = NULL; 1154 PetscScalar *loc_buf; 1155 Vec loc_x; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1160 1161 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1162 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1163 PLA_Obj_set_to_zero(v_pla); 1164 1165 /* Copy b into rhs_pla */ 1166 PLA_API_begin(); 1167 PLA_Obj_API_open(v_pla); 1168 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1169 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1170 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1171 PLA_Obj_API_close(v_pla); 1172 PLA_API_end(); 1173 1174 if (A->factortype == MAT_FACTOR_LU){ 1175 /* Apply the permutations to the right hand sides */ 1176 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1177 1178 /* Solve L y = b, overwriting b with y */ 1179 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1180 1181 /* Solve U x = y (=b), overwriting b with x */ 1182 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1183 } else { /* MAT_FACTOR_CHOLESKY */ 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1185 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1186 PLA_NONUNIT_DIAG,lu->A,v_pla); 1187 } 1188 1189 /* Copy PLAPACK x into Petsc vector x */ 1190 PLA_Obj_local_length(v_pla, &loc_m); 1191 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1192 PLA_Obj_local_stride(v_pla, &loc_stride); 1193 /* 1194 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1195 */ 1196 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1197 if (!lu->pla_solved){ 1198 1199 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1200 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1201 1202 /* Create IS and cts for VecScatterring */ 1203 PLA_Obj_local_length(v_pla, &loc_m); 1204 PLA_Obj_local_stride(v_pla, &loc_stride); 1205 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1206 1207 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1208 for (i=0; i<loc_m; i+=lu->nb){ 1209 j = 0; 1210 while (j < lu->nb && i+j < loc_m){ 1211 idx_petsc[i+j] = rstart + j; j++; 1212 } 1213 rstart += size*lu->nb; 1214 } 1215 1216 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1217 1218 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1220 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1221 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1222 } 1223 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 1226 /* Free data */ 1227 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1228 PLA_Obj_free(&v_pla); 1229 1230 lu->pla_solved = PETSC_TRUE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1236 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1237 { 1238 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1239 PetscErrorCode ierr; 1240 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1241 PetscInt info_pla=0; 1242 PetscScalar *array,one = 1.0; 1243 1244 PetscFunctionBegin; 1245 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1246 PLA_Obj_free(&lu->A); 1247 PLA_Obj_free (&lu->pivots); 1248 } 1249 /* Create PLAPACK matrix object */ 1250 lu->A = NULL; lu->pivots = NULL; 1251 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1252 PLA_Obj_set_to_zero(lu->A); 1253 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1254 1255 /* Copy A into lu->A */ 1256 PLA_API_begin(); 1257 PLA_Obj_API_open(lu->A); 1258 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1259 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1260 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1261 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1262 PLA_Obj_API_close(lu->A); 1263 PLA_API_end(); 1264 1265 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1266 info_pla = PLA_LU(lu->A,lu->pivots); 1267 if (info_pla != 0) 1268 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1269 1270 lu->rstart = rstart; 1271 lu->mstruct = SAME_NONZERO_PATTERN; 1272 F->ops->solve = MatSolve_MPIDense; 1273 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1279 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1280 { 1281 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1282 PetscErrorCode ierr; 1283 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1284 PetscInt info_pla=0; 1285 PetscScalar *array,one = 1.0; 1286 1287 PetscFunctionBegin; 1288 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1289 PLA_Obj_free(&lu->A); 1290 } 1291 /* Create PLAPACK matrix object */ 1292 lu->A = NULL; 1293 lu->pivots = NULL; 1294 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1295 1296 /* Copy A into lu->A */ 1297 PLA_API_begin(); 1298 PLA_Obj_API_open(lu->A); 1299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1300 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1301 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1302 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1303 PLA_Obj_API_close(lu->A); 1304 PLA_API_end(); 1305 1306 /* Factor P A -> Chol */ 1307 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1308 if (info_pla != 0) 1309 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1310 1311 lu->rstart = rstart; 1312 lu->mstruct = SAME_NONZERO_PATTERN; 1313 F->ops->solve = MatSolve_MPIDense; 1314 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /* Note the Petsc perm permutation is ignored */ 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1321 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1322 { 1323 PetscErrorCode ierr; 1324 PetscTruth issymmetric,set; 1325 1326 PetscFunctionBegin; 1327 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1328 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1329 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* Note the Petsc r and c permutations are ignored */ 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1336 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1337 { 1338 PetscErrorCode ierr; 1339 PetscInt M = A->rmap->N; 1340 Mat_Plapack *lu; 1341 1342 PetscFunctionBegin; 1343 lu = (Mat_Plapack*)F->spptr; 1344 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1345 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 EXTERN_C_BEGIN 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1352 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1353 { 1354 PetscFunctionBegin; 1355 *type = MAT_SOLVER_PLAPACK; 1356 PetscFunctionReturn(0); 1357 } 1358 EXTERN_C_END 1359 1360 EXTERN_C_BEGIN 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1363 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1364 { 1365 PetscErrorCode ierr; 1366 Mat_Plapack *lu; 1367 PetscMPIInt size; 1368 PetscInt M=A->rmap->N; 1369 1370 PetscFunctionBegin; 1371 /* Create the factorization matrix */ 1372 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1373 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1374 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1375 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1376 (*F)->spptr = (void*)lu; 1377 1378 /* Set default Plapack parameters */ 1379 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1380 lu->nb = M/size; 1381 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1382 1383 /* Set runtime options */ 1384 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1385 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1386 PetscOptionsEnd(); 1387 1388 /* Create object distribution template */ 1389 lu->templ = NULL; 1390 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1391 1392 /* Set the datatype */ 1393 #if defined(PETSC_USE_COMPLEX) 1394 lu->datatype = MPI_DOUBLE_COMPLEX; 1395 #else 1396 lu->datatype = MPI_DOUBLE; 1397 #endif 1398 1399 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1400 1401 1402 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1403 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1404 1405 if (ftype == MAT_FACTOR_LU) { 1406 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1407 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1408 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1409 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1410 (*F)->factortype = ftype; 1411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 EXTERN_C_END 1415 #endif 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1419 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1420 { 1421 #if defined(PETSC_HAVE_PLAPACK) 1422 PetscErrorCode ierr; 1423 #endif 1424 1425 PetscFunctionBegin; 1426 #if defined(PETSC_HAVE_PLAPACK) 1427 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1428 #else 1429 SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1430 #endif 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatAXPY_MPIDense" 1436 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1437 { 1438 PetscErrorCode ierr; 1439 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1440 1441 PetscFunctionBegin; 1442 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatConjugate_MPIDense" 1448 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1449 { 1450 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1451 PetscErrorCode ierr; 1452 1453 PetscFunctionBegin; 1454 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatRealPart_MPIDense" 1460 PetscErrorCode MatRealPart_MPIDense(Mat A) 1461 { 1462 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1472 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1473 { 1474 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1475 PetscErrorCode ierr; 1476 1477 PetscFunctionBegin; 1478 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 /* -------------------------------------------------------------------*/ 1483 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1484 MatGetRow_MPIDense, 1485 MatRestoreRow_MPIDense, 1486 MatMult_MPIDense, 1487 /* 4*/ MatMultAdd_MPIDense, 1488 MatMultTranspose_MPIDense, 1489 MatMultTransposeAdd_MPIDense, 1490 0, 1491 0, 1492 0, 1493 /*10*/ 0, 1494 0, 1495 0, 1496 0, 1497 MatTranspose_MPIDense, 1498 /*15*/ MatGetInfo_MPIDense, 1499 MatEqual_MPIDense, 1500 MatGetDiagonal_MPIDense, 1501 MatDiagonalScale_MPIDense, 1502 MatNorm_MPIDense, 1503 /*20*/ MatAssemblyBegin_MPIDense, 1504 MatAssemblyEnd_MPIDense, 1505 MatSetOption_MPIDense, 1506 MatZeroEntries_MPIDense, 1507 /*24*/ MatZeroRows_MPIDense, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*29*/ MatSetUpPreallocation_MPIDense, 1513 0, 1514 0, 1515 MatGetArray_MPIDense, 1516 MatRestoreArray_MPIDense, 1517 /*34*/ MatDuplicate_MPIDense, 1518 0, 1519 0, 1520 0, 1521 0, 1522 /*39*/ MatAXPY_MPIDense, 1523 MatGetSubMatrices_MPIDense, 1524 0, 1525 MatGetValues_MPIDense, 1526 0, 1527 /*44*/ 0, 1528 MatScale_MPIDense, 1529 0, 1530 0, 1531 0, 1532 /*49*/ 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*54*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*59*/ MatGetSubMatrix_MPIDense, 1543 MatDestroy_MPIDense, 1544 MatView_MPIDense, 1545 0, 1546 0, 1547 /*64*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*69*/ 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 /*74*/ 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*79*/ 0, 1563 0, 1564 0, 1565 0, 1566 /*83*/ MatLoad_MPIDense, 1567 0, 1568 0, 1569 0, 1570 0, 1571 0, 1572 /*89*/ 1573 #if defined(PETSC_HAVE_PLAPACK) 1574 MatMatMult_MPIDense_MPIDense, 1575 MatMatMultSymbolic_MPIDense_MPIDense, 1576 MatMatMultNumeric_MPIDense_MPIDense, 1577 #else 1578 0, 1579 0, 1580 0, 1581 #endif 1582 0, 1583 0, 1584 /*94*/ 0, 1585 0, 1586 0, 1587 0, 1588 0, 1589 /*99*/ 0, 1590 0, 1591 0, 1592 MatConjugate_MPIDense, 1593 0, 1594 /*104*/0, 1595 MatRealPart_MPIDense, 1596 MatImaginaryPart_MPIDense, 1597 0 1598 }; 1599 1600 EXTERN_C_BEGIN 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1603 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1604 { 1605 Mat_MPIDense *a; 1606 PetscErrorCode ierr; 1607 1608 PetscFunctionBegin; 1609 mat->preallocated = PETSC_TRUE; 1610 /* Note: For now, when data is specified above, this assumes the user correctly 1611 allocates the local dense storage space. We should add error checking. */ 1612 1613 a = (Mat_MPIDense*)mat->data; 1614 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1615 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1616 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1617 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1618 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 EXTERN_C_END 1622 1623 /*MC 1624 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1625 1626 run config/configure.py with the option --download-plapack 1627 1628 1629 Options Database Keys: 1630 . -mat_plapack_nprows <n> - number of rows in processor partition 1631 . -mat_plapack_npcols <n> - number of columns in processor partition 1632 . -mat_plapack_nb <n> - block size of template vector 1633 . -mat_plapack_nb_alg <n> - algorithmic block size 1634 - -mat_plapack_ckerror <n> - error checking flag 1635 1636 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1637 1638 M*/ 1639 1640 EXTERN_C_BEGIN 1641 #undef __FUNCT__ 1642 #define __FUNCT__ "MatCreate_MPIDense" 1643 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1644 { 1645 Mat_MPIDense *a; 1646 PetscErrorCode ierr; 1647 1648 PetscFunctionBegin; 1649 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1650 mat->data = (void*)a; 1651 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1652 mat->mapping = 0; 1653 1654 mat->insertmode = NOT_SET_VALUES; 1655 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1656 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1657 1658 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1659 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1660 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1661 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1662 a->nvec = mat->cmap->n; 1663 1664 /* build cache for off array entries formed */ 1665 a->donotstash = PETSC_FALSE; 1666 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1667 1668 /* stuff used for matrix vector multiply */ 1669 a->lvec = 0; 1670 a->Mvctx = 0; 1671 a->roworiented = PETSC_TRUE; 1672 1673 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1674 "MatGetDiagonalBlock_MPIDense", 1675 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1676 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1677 "MatMPIDenseSetPreallocation_MPIDense", 1678 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1680 "MatMatMult_MPIAIJ_MPIDense", 1681 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1683 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1684 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1685 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1686 "MatMatMultNumeric_MPIAIJ_MPIDense", 1687 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1688 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1689 "MatGetFactor_mpidense_petsc", 1690 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1691 #if defined(PETSC_HAVE_PLAPACK) 1692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1693 "MatGetFactor_mpidense_plapack", 1694 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1695 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1696 #endif 1697 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1698 1699 PetscFunctionReturn(0); 1700 } 1701 EXTERN_C_END 1702 1703 /*MC 1704 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1705 1706 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1707 and MATMPIDENSE otherwise. 1708 1709 Options Database Keys: 1710 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1711 1712 Level: beginner 1713 1714 1715 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1716 M*/ 1717 1718 EXTERN_C_BEGIN 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatCreate_Dense" 1721 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1722 { 1723 PetscErrorCode ierr; 1724 PetscMPIInt size; 1725 1726 PetscFunctionBegin; 1727 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1728 if (size == 1) { 1729 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1730 } else { 1731 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1732 } 1733 PetscFunctionReturn(0); 1734 } 1735 EXTERN_C_END 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1739 /*@C 1740 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1741 1742 Not collective 1743 1744 Input Parameters: 1745 . A - the matrix 1746 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1747 to control all matrix memory allocation. 1748 1749 Notes: 1750 The dense format is fully compatible with standard Fortran 77 1751 storage by columns. 1752 1753 The data input variable is intended primarily for Fortran programmers 1754 who wish to allocate their own matrix memory space. Most users should 1755 set data=PETSC_NULL. 1756 1757 Level: intermediate 1758 1759 .keywords: matrix,dense, parallel 1760 1761 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1762 @*/ 1763 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1764 { 1765 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1766 1767 PetscFunctionBegin; 1768 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1769 if (f) { 1770 ierr = (*f)(mat,data);CHKERRQ(ierr); 1771 } 1772 PetscFunctionReturn(0); 1773 } 1774 1775 #undef __FUNCT__ 1776 #define __FUNCT__ "MatCreateMPIDense" 1777 /*@C 1778 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1779 1780 Collective on MPI_Comm 1781 1782 Input Parameters: 1783 + comm - MPI communicator 1784 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1785 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1786 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1787 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1788 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1789 to control all matrix memory allocation. 1790 1791 Output Parameter: 1792 . A - the matrix 1793 1794 Notes: 1795 The dense format is fully compatible with standard Fortran 77 1796 storage by columns. 1797 1798 The data input variable is intended primarily for Fortran programmers 1799 who wish to allocate their own matrix memory space. Most users should 1800 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1801 1802 The user MUST specify either the local or global matrix dimensions 1803 (possibly both). 1804 1805 Level: intermediate 1806 1807 .keywords: matrix,dense, parallel 1808 1809 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1810 @*/ 1811 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1812 { 1813 PetscErrorCode ierr; 1814 PetscMPIInt size; 1815 1816 PetscFunctionBegin; 1817 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1818 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1819 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1820 if (size > 1) { 1821 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1822 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1823 } else { 1824 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1825 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1826 } 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatDuplicate_MPIDense" 1832 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1833 { 1834 Mat mat; 1835 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1836 PetscErrorCode ierr; 1837 1838 PetscFunctionBegin; 1839 *newmat = 0; 1840 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1841 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1842 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1843 a = (Mat_MPIDense*)mat->data; 1844 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1845 mat->factortype = A->factortype; 1846 mat->assembled = PETSC_TRUE; 1847 mat->preallocated = PETSC_TRUE; 1848 1849 mat->rmap->rstart = A->rmap->rstart; 1850 mat->rmap->rend = A->rmap->rend; 1851 a->size = oldmat->size; 1852 a->rank = oldmat->rank; 1853 mat->insertmode = NOT_SET_VALUES; 1854 a->nvec = oldmat->nvec; 1855 a->donotstash = oldmat->donotstash; 1856 1857 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1858 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1859 1860 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1861 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1862 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1863 1864 *newmat = mat; 1865 PetscFunctionReturn(0); 1866 } 1867 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1870 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1871 { 1872 PetscErrorCode ierr; 1873 PetscMPIInt rank,size; 1874 PetscInt *rowners,i,m,nz,j; 1875 PetscScalar *array,*vals,*vals_ptr; 1876 MPI_Status status; 1877 1878 PetscFunctionBegin; 1879 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1880 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1881 1882 /* determine ownership of all rows */ 1883 m = M/size + ((M % size) > rank); 1884 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1885 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1886 rowners[0] = 0; 1887 for (i=2; i<=size; i++) { 1888 rowners[i] += rowners[i-1]; 1889 } 1890 1891 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1892 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1893 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1894 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1895 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1896 1897 if (!rank) { 1898 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1899 1900 /* read in my part of the matrix numerical values */ 1901 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1902 1903 /* insert into matrix-by row (this is why cannot directly read into array */ 1904 vals_ptr = vals; 1905 for (i=0; i<m; i++) { 1906 for (j=0; j<N; j++) { 1907 array[i + j*m] = *vals_ptr++; 1908 } 1909 } 1910 1911 /* read in other processors and ship out */ 1912 for (i=1; i<size; i++) { 1913 nz = (rowners[i+1] - rowners[i])*N; 1914 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1915 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1916 } 1917 } else { 1918 /* receive numeric values */ 1919 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1920 1921 /* receive message of values*/ 1922 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1923 1924 /* insert into matrix-by row (this is why cannot directly read into array */ 1925 vals_ptr = vals; 1926 for (i=0; i<m; i++) { 1927 for (j=0; j<N; j++) { 1928 array[i + j*m] = *vals_ptr++; 1929 } 1930 } 1931 } 1932 ierr = PetscFree(rowners);CHKERRQ(ierr); 1933 ierr = PetscFree(vals);CHKERRQ(ierr); 1934 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1935 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 #undef __FUNCT__ 1940 #define __FUNCT__ "MatLoad_MPIDense" 1941 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1942 { 1943 Mat A; 1944 PetscScalar *vals,*svals; 1945 MPI_Comm comm = ((PetscObject)viewer)->comm; 1946 MPI_Status status; 1947 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1948 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1949 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1950 PetscInt i,nz,j,rstart,rend; 1951 int fd; 1952 PetscErrorCode ierr; 1953 1954 PetscFunctionBegin; 1955 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1956 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1957 if (!rank) { 1958 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1959 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1960 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1961 } 1962 1963 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1964 M = header[1]; N = header[2]; nz = header[3]; 1965 1966 /* 1967 Handle case where matrix is stored on disk as a dense matrix 1968 */ 1969 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1970 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1971 PetscFunctionReturn(0); 1972 } 1973 1974 /* determine ownership of all rows */ 1975 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1976 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1977 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1978 rowners[0] = 0; 1979 for (i=2; i<=size; i++) { 1980 rowners[i] += rowners[i-1]; 1981 } 1982 rstart = rowners[rank]; 1983 rend = rowners[rank+1]; 1984 1985 /* distribute row lengths to all processors */ 1986 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1987 if (!rank) { 1988 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1989 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1990 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1991 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1992 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1993 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1994 } else { 1995 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1996 } 1997 1998 if (!rank) { 1999 /* calculate the number of nonzeros on each processor */ 2000 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2001 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2002 for (i=0; i<size; i++) { 2003 for (j=rowners[i]; j< rowners[i+1]; j++) { 2004 procsnz[i] += rowlengths[j]; 2005 } 2006 } 2007 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2008 2009 /* determine max buffer needed and allocate it */ 2010 maxnz = 0; 2011 for (i=0; i<size; i++) { 2012 maxnz = PetscMax(maxnz,procsnz[i]); 2013 } 2014 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2015 2016 /* read in my part of the matrix column indices */ 2017 nz = procsnz[0]; 2018 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2019 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2020 2021 /* read in every one elses and ship off */ 2022 for (i=1; i<size; i++) { 2023 nz = procsnz[i]; 2024 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2025 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2026 } 2027 ierr = PetscFree(cols);CHKERRQ(ierr); 2028 } else { 2029 /* determine buffer space needed for message */ 2030 nz = 0; 2031 for (i=0; i<m; i++) { 2032 nz += ourlens[i]; 2033 } 2034 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2035 2036 /* receive message of column indices*/ 2037 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2038 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2039 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2040 } 2041 2042 /* loop over local rows, determining number of off diagonal entries */ 2043 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2044 jj = 0; 2045 for (i=0; i<m; i++) { 2046 for (j=0; j<ourlens[i]; j++) { 2047 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2048 jj++; 2049 } 2050 } 2051 2052 /* create our matrix */ 2053 for (i=0; i<m; i++) { 2054 ourlens[i] -= offlens[i]; 2055 } 2056 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2057 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2058 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2059 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2060 A = *newmat; 2061 for (i=0; i<m; i++) { 2062 ourlens[i] += offlens[i]; 2063 } 2064 2065 if (!rank) { 2066 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2067 2068 /* read in my part of the matrix numerical values */ 2069 nz = procsnz[0]; 2070 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2071 2072 /* insert into matrix */ 2073 jj = rstart; 2074 smycols = mycols; 2075 svals = vals; 2076 for (i=0; i<m; i++) { 2077 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2078 smycols += ourlens[i]; 2079 svals += ourlens[i]; 2080 jj++; 2081 } 2082 2083 /* read in other processors and ship out */ 2084 for (i=1; i<size; i++) { 2085 nz = procsnz[i]; 2086 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2087 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2088 } 2089 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2090 } else { 2091 /* receive numeric values */ 2092 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2093 2094 /* receive message of values*/ 2095 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2096 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2097 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2098 2099 /* insert into matrix */ 2100 jj = rstart; 2101 smycols = mycols; 2102 svals = vals; 2103 for (i=0; i<m; i++) { 2104 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2105 smycols += ourlens[i]; 2106 svals += ourlens[i]; 2107 jj++; 2108 } 2109 } 2110 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2111 ierr = PetscFree(vals);CHKERRQ(ierr); 2112 ierr = PetscFree(mycols);CHKERRQ(ierr); 2113 ierr = PetscFree(rowners);CHKERRQ(ierr); 2114 2115 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2116 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatEqual_MPIDense" 2122 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2123 { 2124 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2125 Mat a,b; 2126 PetscTruth flg; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 a = matA->A; 2131 b = matB->A; 2132 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2133 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #if defined(PETSC_HAVE_PLAPACK) 2138 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2141 /*@C 2142 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2143 Level: developer 2144 2145 .keywords: Petsc, destroy, package, PLAPACK 2146 .seealso: PetscFinalize() 2147 @*/ 2148 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2149 { 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 ierr = PLA_Finalize();CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 #undef __FUNCT__ 2158 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2159 /*@C 2160 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2161 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2162 2163 Input Parameter: 2164 . comm - the communicator the matrix lives on 2165 2166 Level: developer 2167 2168 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2169 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2170 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2171 cannot overlap. 2172 2173 .keywords: Petsc, initialize, package, PLAPACK 2174 .seealso: PetscInitializePackage(), PetscInitialize() 2175 @*/ 2176 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2177 { 2178 PetscMPIInt size; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 if (!PLA_Initialized(PETSC_NULL)) { 2183 2184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2185 Plapack_nprows = 1; 2186 Plapack_npcols = size; 2187 2188 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2189 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2190 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2191 #if defined(PETSC_USE_DEBUG) 2192 Plapack_ierror = 3; 2193 #else 2194 Plapack_ierror = 0; 2195 #endif 2196 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2197 if (Plapack_ierror){ 2198 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2199 } else { 2200 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2201 } 2202 2203 Plapack_nb_alg = 0; 2204 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2205 if (Plapack_nb_alg) { 2206 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2207 } 2208 PetscOptionsEnd(); 2209 2210 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2211 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2212 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2213 } 2214 PetscFunctionReturn(0); 2215 } 2216 2217 #endif 2218