1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscTruth pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscTruth flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows") 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscTruth roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) { 174 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 175 } 176 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 177 } 178 } else { 179 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 180 } 181 } 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatGetArray_MPIDense" 187 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 188 { 189 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 190 PetscErrorCode ierr; 191 192 PetscFunctionBegin; 193 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 199 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 200 { 201 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 202 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 203 PetscErrorCode ierr; 204 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 205 const PetscInt *irow,*icol; 206 PetscScalar *av,*bv,*v = lmat->v; 207 Mat newmat; 208 IS iscol_local; 209 210 PetscFunctionBegin; 211 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 212 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 213 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 214 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 215 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 216 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 217 218 /* No parallel redistribution currently supported! Should really check each index set 219 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 220 original matrix! */ 221 222 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 223 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 224 225 /* Check submatrix call */ 226 if (scall == MAT_REUSE_MATRIX) { 227 /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 228 /* Really need to test rows and column sizes! */ 229 newmat = *B; 230 } else { 231 /* Create and fill new matrix */ 232 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 233 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 234 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 235 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 236 } 237 238 /* Now extract the data pointers and do the copy, column at a time */ 239 newmatd = (Mat_MPIDense*)newmat->data; 240 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 241 242 for (i=0; i<Ncols; i++) { 243 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 244 for (j=0; j<nrows; j++) { 245 *bv++ = av[irow[j] - rstart]; 246 } 247 } 248 249 /* Assemble the matrices so that the correct flags are set */ 250 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 251 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 252 253 /* Free work space */ 254 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 255 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 256 *B = newmat; 257 PetscFunctionReturn(0); 258 } 259 260 #undef __FUNCT__ 261 #define __FUNCT__ "MatRestoreArray_MPIDense" 262 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 263 { 264 PetscFunctionBegin; 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 270 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 271 { 272 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 273 MPI_Comm comm = ((PetscObject)mat)->comm; 274 PetscErrorCode ierr; 275 PetscInt nstash,reallocs; 276 InsertMode addv; 277 278 PetscFunctionBegin; 279 /* make sure all processors are either in INSERTMODE or ADDMODE */ 280 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 281 if (addv == (ADD_VALUES|INSERT_VALUES)) { 282 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 283 } 284 mat->insertmode = addv; /* in case this processor had no cache */ 285 286 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 287 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 288 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 294 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 295 { 296 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 297 PetscErrorCode ierr; 298 PetscInt i,*row,*col,flg,j,rstart,ncols; 299 PetscMPIInt n; 300 PetscScalar *val; 301 InsertMode addv=mat->insertmode; 302 303 PetscFunctionBegin; 304 /* wait on receives */ 305 while (1) { 306 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 307 if (!flg) break; 308 309 for (i=0; i<n;) { 310 /* Now identify the consecutive vals belonging to the same row */ 311 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 312 if (j < n) ncols = j-i; 313 else ncols = n-i; 314 /* Now assemble all these values with a single function call */ 315 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 316 i = j; 317 } 318 } 319 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 320 321 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 322 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 323 324 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 325 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 326 } 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatZeroEntries_MPIDense" 332 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 333 { 334 PetscErrorCode ierr; 335 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 336 337 PetscFunctionBegin; 338 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 342 /* the code does not do the diagonal entries correctly unless the 343 matrix is square and the column and row owerships are identical. 344 This is a BUG. The only way to fix it seems to be to access 345 mdn->A and mdn->B directly and not through the MatZeroRows() 346 routine. 347 */ 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatZeroRows_MPIDense" 350 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 351 { 352 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 353 PetscErrorCode ierr; 354 PetscInt i,*owners = A->rmap->range; 355 PetscInt *nprocs,j,idx,nsends; 356 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 357 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 358 PetscInt *lens,*lrows,*values; 359 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 360 MPI_Comm comm = ((PetscObject)A)->comm; 361 MPI_Request *send_waits,*recv_waits; 362 MPI_Status recv_status,*send_status; 363 PetscTruth found; 364 365 PetscFunctionBegin; 366 /* first count number of contributors to each processor */ 367 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 368 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 369 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 370 for (i=0; i<N; i++) { 371 idx = rows[i]; 372 found = PETSC_FALSE; 373 for (j=0; j<size; j++) { 374 if (idx >= owners[j] && idx < owners[j+1]) { 375 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 376 } 377 } 378 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 379 } 380 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 381 382 /* inform other processors of number of messages and max length*/ 383 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 384 385 /* post receives: */ 386 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 387 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 388 for (i=0; i<nrecvs; i++) { 389 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 390 } 391 392 /* do sends: 393 1) starts[i] gives the starting index in svalues for stuff going to 394 the ith processor 395 */ 396 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 397 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 398 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 399 starts[0] = 0; 400 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 for (i=0; i<N; i++) { 402 svalues[starts[owner[i]]++] = rows[i]; 403 } 404 405 starts[0] = 0; 406 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 407 count = 0; 408 for (i=0; i<size; i++) { 409 if (nprocs[2*i+1]) { 410 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 411 } 412 } 413 ierr = PetscFree(starts);CHKERRQ(ierr); 414 415 base = owners[rank]; 416 417 /* wait on receives */ 418 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 419 count = nrecvs; 420 slen = 0; 421 while (count) { 422 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 423 /* unpack receives into our local space */ 424 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 425 source[imdex] = recv_status.MPI_SOURCE; 426 lens[imdex] = n; 427 slen += n; 428 count--; 429 } 430 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 431 432 /* move the data into the send scatter */ 433 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 434 count = 0; 435 for (i=0; i<nrecvs; i++) { 436 values = rvalues + i*nmax; 437 for (j=0; j<lens[i]; j++) { 438 lrows[count++] = values[j] - base; 439 } 440 } 441 ierr = PetscFree(rvalues);CHKERRQ(ierr); 442 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 443 ierr = PetscFree(owner);CHKERRQ(ierr); 444 ierr = PetscFree(nprocs);CHKERRQ(ierr); 445 446 /* actually zap the local rows */ 447 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 448 ierr = PetscFree(lrows);CHKERRQ(ierr); 449 450 /* wait on sends */ 451 if (nsends) { 452 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 453 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 454 ierr = PetscFree(send_status);CHKERRQ(ierr); 455 } 456 ierr = PetscFree(send_waits);CHKERRQ(ierr); 457 ierr = PetscFree(svalues);CHKERRQ(ierr); 458 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatMult_MPIDense" 464 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 465 { 466 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467 PetscErrorCode ierr; 468 469 PetscFunctionBegin; 470 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatMultAdd_MPIDense" 478 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 479 { 480 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 485 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 486 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 487 PetscFunctionReturn(0); 488 } 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatMultTranspose_MPIDense" 492 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 493 { 494 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 495 PetscErrorCode ierr; 496 PetscScalar zero = 0.0; 497 498 PetscFunctionBegin; 499 ierr = VecSet(yy,zero);CHKERRQ(ierr); 500 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 501 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 502 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 508 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 509 { 510 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 515 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 516 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 517 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521 #undef __FUNCT__ 522 #define __FUNCT__ "MatGetDiagonal_MPIDense" 523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 524 { 525 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 526 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 527 PetscErrorCode ierr; 528 PetscInt len,i,n,m = A->rmap->n,radd; 529 PetscScalar *x,zero = 0.0; 530 531 PetscFunctionBegin; 532 ierr = VecSet(v,zero);CHKERRQ(ierr); 533 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 534 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 535 if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 536 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 537 radd = A->rmap->rstart*m; 538 for (i=0; i<len; i++) { 539 x[i] = aloc->v[radd + i*m + i]; 540 } 541 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatDestroy_MPIDense" 547 PetscErrorCode MatDestroy_MPIDense(Mat mat) 548 { 549 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 550 PetscErrorCode ierr; 551 #if defined(PETSC_HAVE_PLAPACK) 552 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 553 #endif 554 555 PetscFunctionBegin; 556 557 #if defined(PETSC_USE_LOG) 558 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 559 #endif 560 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 561 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 562 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 563 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 564 #if defined(PETSC_HAVE_PLAPACK) 565 if (lu) { 566 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 567 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 568 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 569 570 if (lu->is_pla) { 571 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 572 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 573 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 574 } 575 } 576 #endif 577 578 ierr = PetscFree(mdn);CHKERRQ(ierr); 579 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 580 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 583 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatView_MPIDense_Binary" 590 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 591 { 592 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 593 PetscErrorCode ierr; 594 PetscViewerFormat format; 595 int fd; 596 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 597 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 598 PetscScalar *work,*v,*vv; 599 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 600 MPI_Status status; 601 602 PetscFunctionBegin; 603 if (mdn->size == 1) { 604 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 605 } else { 606 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 607 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 608 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 609 610 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 611 if (format == PETSC_VIEWER_NATIVE) { 612 613 if (!rank) { 614 /* store the matrix as a dense matrix */ 615 header[0] = MAT_FILE_CLASSID; 616 header[1] = mat->rmap->N; 617 header[2] = N; 618 header[3] = MATRIX_BINARY_FORMAT_DENSE; 619 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 620 621 /* get largest work array needed for transposing array */ 622 mmax = mat->rmap->n; 623 for (i=1; i<size; i++) { 624 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 625 } 626 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 627 628 /* write out local array, by rows */ 629 m = mat->rmap->n; 630 v = a->v; 631 for (j=0; j<N; j++) { 632 for (i=0; i<m; i++) { 633 work[j + i*N] = *v++; 634 } 635 } 636 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 637 /* get largest work array to receive messages from other processes, excludes process zero */ 638 mmax = 0; 639 for (i=1; i<size; i++) { 640 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 641 } 642 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 643 for(k = 1; k < size; k++) { 644 v = vv; 645 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 646 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 647 648 for(j = 0; j < N; j++) { 649 for(i = 0; i < m; i++) { 650 work[j + i*N] = *v++; 651 } 652 } 653 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 654 } 655 ierr = PetscFree(work);CHKERRQ(ierr); 656 ierr = PetscFree(vv);CHKERRQ(ierr); 657 } else { 658 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 659 } 660 } else { 661 SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE"); 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 669 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 670 { 671 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 672 PetscErrorCode ierr; 673 PetscMPIInt size = mdn->size,rank = mdn->rank; 674 const PetscViewerType vtype; 675 PetscTruth iascii,isdraw; 676 PetscViewer sviewer; 677 PetscViewerFormat format; 678 #if defined(PETSC_HAVE_PLAPACK) 679 Mat_Plapack *lu; 680 #endif 681 682 PetscFunctionBegin; 683 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 684 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 685 if (iascii) { 686 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 687 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 688 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 689 MatInfo info; 690 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 692 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 693 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 694 #if defined(PETSC_HAVE_PLAPACK) 695 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 696 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 697 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 698 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 699 if (mat->factortype){ 700 lu=(Mat_Plapack*)(mat->spptr); 701 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 702 } 703 #else 704 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 705 #endif 706 PetscFunctionReturn(0); 707 } else if (format == PETSC_VIEWER_ASCII_INFO) { 708 PetscFunctionReturn(0); 709 } 710 } else if (isdraw) { 711 PetscDraw draw; 712 PetscTruth isnull; 713 714 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 715 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 716 if (isnull) PetscFunctionReturn(0); 717 } 718 719 if (size == 1) { 720 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 721 } else { 722 /* assemble the entire matrix onto first processor. */ 723 Mat A; 724 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 725 PetscInt *cols; 726 PetscScalar *vals; 727 728 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 729 if (!rank) { 730 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 731 } else { 732 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 733 } 734 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 735 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 736 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 737 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 738 739 /* Copy the matrix ... This isn't the most efficient means, 740 but it's quick for now */ 741 A->insertmode = INSERT_VALUES; 742 row = mat->rmap->rstart; m = mdn->A->rmap->n; 743 for (i=0; i<m; i++) { 744 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 745 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 746 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 747 row++; 748 } 749 750 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 752 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 753 if (!rank) { 754 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 755 } 756 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 757 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 758 ierr = MatDestroy(A);CHKERRQ(ierr); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatView_MPIDense" 765 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 766 { 767 PetscErrorCode ierr; 768 PetscTruth iascii,isbinary,isdraw,issocket; 769 770 PetscFunctionBegin; 771 772 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 773 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 774 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 775 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 776 777 if (iascii || issocket || isdraw) { 778 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 779 } else if (isbinary) { 780 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 781 } else { 782 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 783 } 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatGetInfo_MPIDense" 789 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 790 { 791 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 792 Mat mdn = mat->A; 793 PetscErrorCode ierr; 794 PetscReal isend[5],irecv[5]; 795 796 PetscFunctionBegin; 797 info->block_size = 1.0; 798 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 799 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 800 isend[3] = info->memory; isend[4] = info->mallocs; 801 if (flag == MAT_LOCAL) { 802 info->nz_used = isend[0]; 803 info->nz_allocated = isend[1]; 804 info->nz_unneeded = isend[2]; 805 info->memory = isend[3]; 806 info->mallocs = isend[4]; 807 } else if (flag == MAT_GLOBAL_MAX) { 808 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 809 info->nz_used = irecv[0]; 810 info->nz_allocated = irecv[1]; 811 info->nz_unneeded = irecv[2]; 812 info->memory = irecv[3]; 813 info->mallocs = irecv[4]; 814 } else if (flag == MAT_GLOBAL_SUM) { 815 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 816 info->nz_used = irecv[0]; 817 info->nz_allocated = irecv[1]; 818 info->nz_unneeded = irecv[2]; 819 info->memory = irecv[3]; 820 info->mallocs = irecv[4]; 821 } 822 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 823 info->fill_ratio_needed = 0; 824 info->factor_mallocs = 0; 825 PetscFunctionReturn(0); 826 } 827 828 #undef __FUNCT__ 829 #define __FUNCT__ "MatSetOption_MPIDense" 830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg) 831 { 832 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 833 PetscErrorCode ierr; 834 835 PetscFunctionBegin; 836 switch (op) { 837 case MAT_NEW_NONZERO_LOCATIONS: 838 case MAT_NEW_NONZERO_LOCATION_ERR: 839 case MAT_NEW_NONZERO_ALLOCATION_ERR: 840 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 841 break; 842 case MAT_ROW_ORIENTED: 843 a->roworiented = flg; 844 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 845 break; 846 case MAT_NEW_DIAGONALS: 847 case MAT_USE_HASH_TABLE: 848 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 849 break; 850 case MAT_IGNORE_OFF_PROC_ENTRIES: 851 a->donotstash = flg; 852 break; 853 case MAT_SYMMETRIC: 854 case MAT_STRUCTURALLY_SYMMETRIC: 855 case MAT_HERMITIAN: 856 case MAT_SYMMETRY_ETERNAL: 857 case MAT_IGNORE_LOWER_TRIANGULAR: 858 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 859 break; 860 default: 861 SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 862 } 863 PetscFunctionReturn(0); 864 } 865 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatDiagonalScale_MPIDense" 869 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 870 { 871 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 872 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 873 PetscScalar *l,*r,x,*v; 874 PetscErrorCode ierr; 875 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 876 877 PetscFunctionBegin; 878 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 879 if (ll) { 880 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 881 if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 882 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 883 for (i=0; i<m; i++) { 884 x = l[i]; 885 v = mat->v + i; 886 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 887 } 888 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 889 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 890 } 891 if (rr) { 892 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 893 if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 894 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 897 for (i=0; i<n; i++) { 898 x = r[i]; 899 v = mat->v + i*m; 900 for (j=0; j<m; j++) { (*v++) *= x;} 901 } 902 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 903 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 904 } 905 PetscFunctionReturn(0); 906 } 907 908 #undef __FUNCT__ 909 #define __FUNCT__ "MatNorm_MPIDense" 910 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 911 { 912 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 913 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 914 PetscErrorCode ierr; 915 PetscInt i,j; 916 PetscReal sum = 0.0; 917 PetscScalar *v = mat->v; 918 919 PetscFunctionBegin; 920 if (mdn->size == 1) { 921 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 922 } else { 923 if (type == NORM_FROBENIUS) { 924 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 925 #if defined(PETSC_USE_COMPLEX) 926 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 927 #else 928 sum += (*v)*(*v); v++; 929 #endif 930 } 931 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 932 *nrm = sqrt(*nrm); 933 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 934 } else if (type == NORM_1) { 935 PetscReal *tmp,*tmp2; 936 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 937 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 938 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 939 *nrm = 0.0; 940 v = mat->v; 941 for (j=0; j<mdn->A->cmap->n; j++) { 942 for (i=0; i<mdn->A->rmap->n; i++) { 943 tmp[j] += PetscAbsScalar(*v); v++; 944 } 945 } 946 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 947 for (j=0; j<A->cmap->N; j++) { 948 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 949 } 950 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 951 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 952 } else if (type == NORM_INFINITY) { /* max row norm */ 953 PetscReal ntemp; 954 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 955 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 956 } else { 957 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 958 } 959 } 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatTranspose_MPIDense" 965 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 966 { 967 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 968 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 969 Mat B; 970 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 971 PetscErrorCode ierr; 972 PetscInt j,i; 973 PetscScalar *v; 974 975 PetscFunctionBegin; 976 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place"); 977 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 978 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 979 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 980 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 981 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 982 } else { 983 B = *matout; 984 } 985 986 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 987 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 988 for (i=0; i<m; i++) rwork[i] = rstart + i; 989 for (j=0; j<n; j++) { 990 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 991 v += m; 992 } 993 ierr = PetscFree(rwork);CHKERRQ(ierr); 994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 996 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 997 *matout = B; 998 } else { 999 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 1005 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1006 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1011 { 1012 PetscErrorCode ierr; 1013 1014 PetscFunctionBegin; 1015 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 #if defined(PETSC_HAVE_PLAPACK) 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1023 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1024 { 1025 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1026 PetscErrorCode ierr; 1027 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1028 PetscScalar *array; 1029 PetscReal one = 1.0; 1030 1031 PetscFunctionBegin; 1032 /* Copy A into F->lu->A */ 1033 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1034 ierr = PLA_API_begin();CHKERRQ(ierr); 1035 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1036 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1037 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1038 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1039 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1040 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1041 ierr = PLA_API_end();CHKERRQ(ierr); 1042 lu->rstart = rstart; 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1048 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1049 { 1050 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1051 PetscErrorCode ierr; 1052 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1053 PetscScalar *array; 1054 PetscReal one = 1.0; 1055 1056 PetscFunctionBegin; 1057 /* Copy F into A->lu->A */ 1058 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1059 ierr = PLA_API_begin();CHKERRQ(ierr); 1060 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1062 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1063 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1064 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1065 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1066 ierr = PLA_API_end();CHKERRQ(ierr); 1067 lu->rstart = rstart; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1073 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1074 { 1075 PetscErrorCode ierr; 1076 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1077 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1078 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1079 PLA_Obj alpha = NULL,beta = NULL; 1080 1081 PetscFunctionBegin; 1082 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1083 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1084 1085 /* 1086 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1087 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1088 */ 1089 1090 /* do the multiply in PLA */ 1091 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1092 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1093 CHKMEMQ; 1094 1095 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1096 CHKMEMQ; 1097 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1098 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1099 1100 /* 1101 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1102 */ 1103 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1109 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1110 { 1111 PetscErrorCode ierr; 1112 PetscInt m=A->rmap->n,n=B->cmap->n; 1113 Mat Cmat; 1114 1115 PetscFunctionBegin; 1116 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1117 SETERRQ(PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1118 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1119 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1120 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1121 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1122 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1123 1124 *C = Cmat; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1130 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1131 { 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 if (scall == MAT_INITIAL_MATRIX){ 1136 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1137 } 1138 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatSolve_MPIDense" 1144 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1145 { 1146 MPI_Comm comm = ((PetscObject)A)->comm; 1147 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1148 PetscErrorCode ierr; 1149 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1150 PetscScalar *array; 1151 PetscReal one = 1.0; 1152 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1153 PLA_Obj v_pla = NULL; 1154 PetscScalar *loc_buf; 1155 Vec loc_x; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1159 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1160 1161 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1162 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1163 PLA_Obj_set_to_zero(v_pla); 1164 1165 /* Copy b into rhs_pla */ 1166 PLA_API_begin(); 1167 PLA_Obj_API_open(v_pla); 1168 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1169 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1170 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1171 PLA_Obj_API_close(v_pla); 1172 PLA_API_end(); 1173 1174 if (A->factortype == MAT_FACTOR_LU){ 1175 /* Apply the permutations to the right hand sides */ 1176 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1177 1178 /* Solve L y = b, overwriting b with y */ 1179 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1180 1181 /* Solve U x = y (=b), overwriting b with x */ 1182 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1183 } else { /* MAT_FACTOR_CHOLESKY */ 1184 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1185 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1186 PLA_NONUNIT_DIAG,lu->A,v_pla); 1187 } 1188 1189 /* Copy PLAPACK x into Petsc vector x */ 1190 PLA_Obj_local_length(v_pla, &loc_m); 1191 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1192 PLA_Obj_local_stride(v_pla, &loc_stride); 1193 /* 1194 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1195 */ 1196 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1197 if (!lu->pla_solved){ 1198 1199 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1200 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1201 1202 /* Create IS and cts for VecScatterring */ 1203 PLA_Obj_local_length(v_pla, &loc_m); 1204 PLA_Obj_local_stride(v_pla, &loc_stride); 1205 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1206 1207 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1208 for (i=0; i<loc_m; i+=lu->nb){ 1209 j = 0; 1210 while (j < lu->nb && i+j < loc_m){ 1211 idx_petsc[i+j] = rstart + j; j++; 1212 } 1213 rstart += size*lu->nb; 1214 } 1215 1216 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1217 1218 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr); 1219 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr); 1220 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1221 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1222 } 1223 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 1226 /* Free data */ 1227 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1228 PLA_Obj_free(&v_pla); 1229 1230 lu->pla_solved = PETSC_TRUE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1236 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1237 { 1238 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1239 PetscErrorCode ierr; 1240 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1241 PetscInt info_pla=0; 1242 PetscScalar *array,one = 1.0; 1243 1244 PetscFunctionBegin; 1245 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1246 PLA_Obj_free(&lu->A); 1247 PLA_Obj_free (&lu->pivots); 1248 } 1249 /* Create PLAPACK matrix object */ 1250 lu->A = NULL; lu->pivots = NULL; 1251 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1252 PLA_Obj_set_to_zero(lu->A); 1253 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1254 1255 /* Copy A into lu->A */ 1256 PLA_API_begin(); 1257 PLA_Obj_API_open(lu->A); 1258 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1259 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1260 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1261 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1262 PLA_Obj_API_close(lu->A); 1263 PLA_API_end(); 1264 1265 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1266 info_pla = PLA_LU(lu->A,lu->pivots); 1267 if (info_pla != 0) 1268 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1269 1270 lu->rstart = rstart; 1271 lu->mstruct = SAME_NONZERO_PATTERN; 1272 F->ops->solve = MatSolve_MPIDense; 1273 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1279 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1280 { 1281 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1282 PetscErrorCode ierr; 1283 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1284 PetscInt info_pla=0; 1285 PetscScalar *array,one = 1.0; 1286 1287 PetscFunctionBegin; 1288 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1289 PLA_Obj_free(&lu->A); 1290 } 1291 /* Create PLAPACK matrix object */ 1292 lu->A = NULL; 1293 lu->pivots = NULL; 1294 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1295 1296 /* Copy A into lu->A */ 1297 PLA_API_begin(); 1298 PLA_Obj_API_open(lu->A); 1299 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1300 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1301 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1302 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1303 PLA_Obj_API_close(lu->A); 1304 PLA_API_end(); 1305 1306 /* Factor P A -> Chol */ 1307 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1308 if (info_pla != 0) 1309 SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1310 1311 lu->rstart = rstart; 1312 lu->mstruct = SAME_NONZERO_PATTERN; 1313 F->ops->solve = MatSolve_MPIDense; 1314 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1315 PetscFunctionReturn(0); 1316 } 1317 1318 /* Note the Petsc perm permutation is ignored */ 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1321 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1322 { 1323 PetscErrorCode ierr; 1324 PetscTruth issymmetric,set; 1325 1326 PetscFunctionBegin; 1327 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1328 if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1329 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* Note the Petsc r and c permutations are ignored */ 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1336 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1337 { 1338 PetscErrorCode ierr; 1339 PetscInt M = A->rmap->N; 1340 Mat_Plapack *lu; 1341 1342 PetscFunctionBegin; 1343 lu = (Mat_Plapack*)F->spptr; 1344 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1345 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1346 PetscFunctionReturn(0); 1347 } 1348 1349 EXTERN_C_BEGIN 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1352 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1353 { 1354 PetscFunctionBegin; 1355 *type = MAT_SOLVER_PLAPACK; 1356 PetscFunctionReturn(0); 1357 } 1358 EXTERN_C_END 1359 1360 EXTERN_C_BEGIN 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1363 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1364 { 1365 PetscErrorCode ierr; 1366 Mat_Plapack *lu; 1367 PetscMPIInt size; 1368 PetscInt M=A->rmap->N; 1369 1370 PetscFunctionBegin; 1371 /* Create the factorization matrix */ 1372 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1373 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1374 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1375 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1376 (*F)->spptr = (void*)lu; 1377 1378 /* Set default Plapack parameters */ 1379 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1380 lu->nb = M/size; 1381 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1382 1383 /* Set runtime options */ 1384 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1385 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1386 PetscOptionsEnd(); 1387 1388 /* Create object distribution template */ 1389 lu->templ = NULL; 1390 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1391 1392 /* Set the datatype */ 1393 #if defined(PETSC_USE_COMPLEX) 1394 lu->datatype = MPI_DOUBLE_COMPLEX; 1395 #else 1396 lu->datatype = MPI_DOUBLE; 1397 #endif 1398 1399 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1400 1401 1402 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1403 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1404 1405 if (ftype == MAT_FACTOR_LU) { 1406 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1407 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1408 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1409 } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1410 (*F)->factortype = ftype; 1411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1412 PetscFunctionReturn(0); 1413 } 1414 EXTERN_C_END 1415 #endif 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1419 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1420 { 1421 #if defined(PETSC_HAVE_PLAPACK) 1422 PetscErrorCode ierr; 1423 #endif 1424 1425 PetscFunctionBegin; 1426 #if defined(PETSC_HAVE_PLAPACK) 1427 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1428 #else 1429 SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1430 #endif 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatAXPY_MPIDense" 1436 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1437 { 1438 PetscErrorCode ierr; 1439 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1440 1441 PetscFunctionBegin; 1442 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatConjugate_MPIDense" 1448 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1449 { 1450 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1451 PetscErrorCode ierr; 1452 1453 PetscFunctionBegin; 1454 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNCT__ 1459 #define __FUNCT__ "MatRealPart_MPIDense" 1460 PetscErrorCode MatRealPart_MPIDense(Mat A) 1461 { 1462 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1463 PetscErrorCode ierr; 1464 1465 PetscFunctionBegin; 1466 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1472 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1473 { 1474 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1475 PetscErrorCode ierr; 1476 1477 PetscFunctionBegin; 1478 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } 1481 1482 /* -------------------------------------------------------------------*/ 1483 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1484 MatGetRow_MPIDense, 1485 MatRestoreRow_MPIDense, 1486 MatMult_MPIDense, 1487 /* 4*/ MatMultAdd_MPIDense, 1488 MatMultTranspose_MPIDense, 1489 MatMultTransposeAdd_MPIDense, 1490 0, 1491 0, 1492 0, 1493 /*10*/ 0, 1494 0, 1495 0, 1496 0, 1497 MatTranspose_MPIDense, 1498 /*15*/ MatGetInfo_MPIDense, 1499 MatEqual_MPIDense, 1500 MatGetDiagonal_MPIDense, 1501 MatDiagonalScale_MPIDense, 1502 MatNorm_MPIDense, 1503 /*20*/ MatAssemblyBegin_MPIDense, 1504 MatAssemblyEnd_MPIDense, 1505 MatSetOption_MPIDense, 1506 MatZeroEntries_MPIDense, 1507 /*24*/ MatZeroRows_MPIDense, 1508 0, 1509 0, 1510 0, 1511 0, 1512 /*29*/ MatSetUpPreallocation_MPIDense, 1513 0, 1514 0, 1515 MatGetArray_MPIDense, 1516 MatRestoreArray_MPIDense, 1517 /*34*/ MatDuplicate_MPIDense, 1518 0, 1519 0, 1520 0, 1521 0, 1522 /*39*/ MatAXPY_MPIDense, 1523 MatGetSubMatrices_MPIDense, 1524 0, 1525 MatGetValues_MPIDense, 1526 0, 1527 /*44*/ 0, 1528 MatScale_MPIDense, 1529 0, 1530 0, 1531 0, 1532 /*49*/ 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*54*/ 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 /*59*/ MatGetSubMatrix_MPIDense, 1543 MatDestroy_MPIDense, 1544 MatView_MPIDense, 1545 0, 1546 0, 1547 /*64*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*69*/ 0, 1553 0, 1554 0, 1555 0, 1556 0, 1557 /*74*/ 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /*79*/ 0, 1563 0, 1564 0, 1565 0, 1566 /*83*/ MatLoad_MPIDense, 1567 0, 1568 0, 1569 0, 1570 0, 1571 0, 1572 /*89*/ 1573 #if defined(PETSC_HAVE_PLAPACK) 1574 MatMatMult_MPIDense_MPIDense, 1575 MatMatMultSymbolic_MPIDense_MPIDense, 1576 MatMatMultNumeric_MPIDense_MPIDense, 1577 #else 1578 0, 1579 0, 1580 0, 1581 #endif 1582 0, 1583 0, 1584 /*94*/ 0, 1585 0, 1586 0, 1587 0, 1588 0, 1589 /*99*/ 0, 1590 0, 1591 0, 1592 MatConjugate_MPIDense, 1593 0, 1594 /*104*/0, 1595 MatRealPart_MPIDense, 1596 MatImaginaryPart_MPIDense, 1597 0 1598 }; 1599 1600 EXTERN_C_BEGIN 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1603 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1604 { 1605 Mat_MPIDense *a; 1606 PetscErrorCode ierr; 1607 1608 PetscFunctionBegin; 1609 mat->preallocated = PETSC_TRUE; 1610 /* Note: For now, when data is specified above, this assumes the user correctly 1611 allocates the local dense storage space. We should add error checking. */ 1612 1613 a = (Mat_MPIDense*)mat->data; 1614 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1615 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1616 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1617 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1618 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1619 PetscFunctionReturn(0); 1620 } 1621 EXTERN_C_END 1622 1623 /*MC 1624 MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1625 1626 run config/configure.py with the option --download-plapack 1627 1628 1629 Options Database Keys: 1630 . -mat_plapack_nprows <n> - number of rows in processor partition 1631 . -mat_plapack_npcols <n> - number of columns in processor partition 1632 . -mat_plapack_nb <n> - block size of template vector 1633 . -mat_plapack_nb_alg <n> - algorithmic block size 1634 - -mat_plapack_ckerror <n> - error checking flag 1635 1636 Level: intermediate 1637 1638 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1639 1640 M*/ 1641 1642 EXTERN_C_BEGIN 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatCreate_MPIDense" 1645 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1646 { 1647 Mat_MPIDense *a; 1648 PetscErrorCode ierr; 1649 1650 PetscFunctionBegin; 1651 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1652 mat->data = (void*)a; 1653 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1654 mat->mapping = 0; 1655 1656 mat->insertmode = NOT_SET_VALUES; 1657 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1658 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1659 1660 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1661 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1662 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1663 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1664 a->nvec = mat->cmap->n; 1665 1666 /* build cache for off array entries formed */ 1667 a->donotstash = PETSC_FALSE; 1668 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1669 1670 /* stuff used for matrix vector multiply */ 1671 a->lvec = 0; 1672 a->Mvctx = 0; 1673 a->roworiented = PETSC_TRUE; 1674 1675 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1676 "MatGetDiagonalBlock_MPIDense", 1677 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1678 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1679 "MatMPIDenseSetPreallocation_MPIDense", 1680 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1681 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1682 "MatMatMult_MPIAIJ_MPIDense", 1683 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1685 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1686 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1688 "MatMatMultNumeric_MPIAIJ_MPIDense", 1689 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1691 "MatGetFactor_mpidense_petsc", 1692 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1693 #if defined(PETSC_HAVE_PLAPACK) 1694 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1695 "MatGetFactor_mpidense_plapack", 1696 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1697 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1698 #endif 1699 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1700 1701 PetscFunctionReturn(0); 1702 } 1703 EXTERN_C_END 1704 1705 /*MC 1706 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1707 1708 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1709 and MATMPIDENSE otherwise. 1710 1711 Options Database Keys: 1712 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1713 1714 Level: beginner 1715 1716 1717 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1718 M*/ 1719 1720 EXTERN_C_BEGIN 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatCreate_Dense" 1723 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1724 { 1725 PetscErrorCode ierr; 1726 PetscMPIInt size; 1727 1728 PetscFunctionBegin; 1729 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1730 if (size == 1) { 1731 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1732 } else { 1733 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 EXTERN_C_END 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1741 /*@C 1742 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1743 1744 Not collective 1745 1746 Input Parameters: 1747 . A - the matrix 1748 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1749 to control all matrix memory allocation. 1750 1751 Notes: 1752 The dense format is fully compatible with standard Fortran 77 1753 storage by columns. 1754 1755 The data input variable is intended primarily for Fortran programmers 1756 who wish to allocate their own matrix memory space. Most users should 1757 set data=PETSC_NULL. 1758 1759 Level: intermediate 1760 1761 .keywords: matrix,dense, parallel 1762 1763 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1764 @*/ 1765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1766 { 1767 PetscErrorCode ierr,(*f)(Mat,PetscScalar *); 1768 1769 PetscFunctionBegin; 1770 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1771 if (f) { 1772 ierr = (*f)(mat,data);CHKERRQ(ierr); 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatCreateMPIDense" 1779 /*@C 1780 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1781 1782 Collective on MPI_Comm 1783 1784 Input Parameters: 1785 + comm - MPI communicator 1786 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1787 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1788 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1789 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1790 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1791 to control all matrix memory allocation. 1792 1793 Output Parameter: 1794 . A - the matrix 1795 1796 Notes: 1797 The dense format is fully compatible with standard Fortran 77 1798 storage by columns. 1799 1800 The data input variable is intended primarily for Fortran programmers 1801 who wish to allocate their own matrix memory space. Most users should 1802 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1803 1804 The user MUST specify either the local or global matrix dimensions 1805 (possibly both). 1806 1807 Level: intermediate 1808 1809 .keywords: matrix,dense, parallel 1810 1811 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1812 @*/ 1813 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1814 { 1815 PetscErrorCode ierr; 1816 PetscMPIInt size; 1817 1818 PetscFunctionBegin; 1819 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1820 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1821 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1822 if (size > 1) { 1823 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1824 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1825 } else { 1826 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1827 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1828 } 1829 PetscFunctionReturn(0); 1830 } 1831 1832 #undef __FUNCT__ 1833 #define __FUNCT__ "MatDuplicate_MPIDense" 1834 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1835 { 1836 Mat mat; 1837 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 *newmat = 0; 1842 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1843 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1844 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1845 a = (Mat_MPIDense*)mat->data; 1846 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1847 mat->factortype = A->factortype; 1848 mat->assembled = PETSC_TRUE; 1849 mat->preallocated = PETSC_TRUE; 1850 1851 mat->rmap->rstart = A->rmap->rstart; 1852 mat->rmap->rend = A->rmap->rend; 1853 a->size = oldmat->size; 1854 a->rank = oldmat->rank; 1855 mat->insertmode = NOT_SET_VALUES; 1856 a->nvec = oldmat->nvec; 1857 a->donotstash = oldmat->donotstash; 1858 1859 ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1860 ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 1861 1862 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1863 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1864 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1865 1866 *newmat = mat; 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1872 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat) 1873 { 1874 PetscErrorCode ierr; 1875 PetscMPIInt rank,size; 1876 PetscInt *rowners,i,m,nz,j; 1877 PetscScalar *array,*vals,*vals_ptr; 1878 MPI_Status status; 1879 1880 PetscFunctionBegin; 1881 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1882 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1883 1884 /* determine ownership of all rows */ 1885 m = M/size + ((M % size) > rank); 1886 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1887 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1888 rowners[0] = 0; 1889 for (i=2; i<=size; i++) { 1890 rowners[i] += rowners[i-1]; 1891 } 1892 1893 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 1894 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1895 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 1896 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 1897 ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr); 1898 1899 if (!rank) { 1900 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1901 1902 /* read in my part of the matrix numerical values */ 1903 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1904 1905 /* insert into matrix-by row (this is why cannot directly read into array */ 1906 vals_ptr = vals; 1907 for (i=0; i<m; i++) { 1908 for (j=0; j<N; j++) { 1909 array[i + j*m] = *vals_ptr++; 1910 } 1911 } 1912 1913 /* read in other processors and ship out */ 1914 for (i=1; i<size; i++) { 1915 nz = (rowners[i+1] - rowners[i])*N; 1916 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1917 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr); 1918 } 1919 } else { 1920 /* receive numeric values */ 1921 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1922 1923 /* receive message of values*/ 1924 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr); 1925 1926 /* insert into matrix-by row (this is why cannot directly read into array */ 1927 vals_ptr = vals; 1928 for (i=0; i<m; i++) { 1929 for (j=0; j<N; j++) { 1930 array[i + j*m] = *vals_ptr++; 1931 } 1932 } 1933 } 1934 ierr = PetscFree(rowners);CHKERRQ(ierr); 1935 ierr = PetscFree(vals);CHKERRQ(ierr); 1936 ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1937 ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatLoad_MPIDense" 1943 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat) 1944 { 1945 Mat A; 1946 PetscScalar *vals,*svals; 1947 MPI_Comm comm = ((PetscObject)viewer)->comm; 1948 MPI_Status status; 1949 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1950 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1951 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1952 PetscInt i,nz,j,rstart,rend; 1953 int fd; 1954 PetscErrorCode ierr; 1955 1956 PetscFunctionBegin; 1957 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1958 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1959 if (!rank) { 1960 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1961 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1962 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1963 } 1964 1965 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1966 M = header[1]; N = header[2]; nz = header[3]; 1967 1968 /* 1969 Handle case where matrix is stored on disk as a dense matrix 1970 */ 1971 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1972 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /* determine ownership of all rows */ 1977 m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1978 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1979 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1980 rowners[0] = 0; 1981 for (i=2; i<=size; i++) { 1982 rowners[i] += rowners[i-1]; 1983 } 1984 rstart = rowners[rank]; 1985 rend = rowners[rank+1]; 1986 1987 /* distribute row lengths to all processors */ 1988 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 1989 if (!rank) { 1990 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1991 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1992 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1993 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1994 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1995 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1996 } else { 1997 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1998 } 1999 2000 if (!rank) { 2001 /* calculate the number of nonzeros on each processor */ 2002 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2003 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2004 for (i=0; i<size; i++) { 2005 for (j=rowners[i]; j< rowners[i+1]; j++) { 2006 procsnz[i] += rowlengths[j]; 2007 } 2008 } 2009 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2010 2011 /* determine max buffer needed and allocate it */ 2012 maxnz = 0; 2013 for (i=0; i<size; i++) { 2014 maxnz = PetscMax(maxnz,procsnz[i]); 2015 } 2016 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2017 2018 /* read in my part of the matrix column indices */ 2019 nz = procsnz[0]; 2020 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2021 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2022 2023 /* read in every one elses and ship off */ 2024 for (i=1; i<size; i++) { 2025 nz = procsnz[i]; 2026 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2027 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2028 } 2029 ierr = PetscFree(cols);CHKERRQ(ierr); 2030 } else { 2031 /* determine buffer space needed for message */ 2032 nz = 0; 2033 for (i=0; i<m; i++) { 2034 nz += ourlens[i]; 2035 } 2036 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2037 2038 /* receive message of column indices*/ 2039 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2040 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2041 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2042 } 2043 2044 /* loop over local rows, determining number of off diagonal entries */ 2045 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2046 jj = 0; 2047 for (i=0; i<m; i++) { 2048 for (j=0; j<ourlens[i]; j++) { 2049 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2050 jj++; 2051 } 2052 } 2053 2054 /* create our matrix */ 2055 for (i=0; i<m; i++) { 2056 ourlens[i] -= offlens[i]; 2057 } 2058 ierr = MatCreate(comm,newmat);CHKERRQ(ierr); 2059 ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2060 ierr = MatSetType(*newmat,type);CHKERRQ(ierr); 2061 ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr); 2062 A = *newmat; 2063 for (i=0; i<m; i++) { 2064 ourlens[i] += offlens[i]; 2065 } 2066 2067 if (!rank) { 2068 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2069 2070 /* read in my part of the matrix numerical values */ 2071 nz = procsnz[0]; 2072 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2073 2074 /* insert into matrix */ 2075 jj = rstart; 2076 smycols = mycols; 2077 svals = vals; 2078 for (i=0; i<m; i++) { 2079 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2080 smycols += ourlens[i]; 2081 svals += ourlens[i]; 2082 jj++; 2083 } 2084 2085 /* read in other processors and ship out */ 2086 for (i=1; i<size; i++) { 2087 nz = procsnz[i]; 2088 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2089 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr); 2090 } 2091 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2092 } else { 2093 /* receive numeric values */ 2094 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2095 2096 /* receive message of values*/ 2097 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr); 2098 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2099 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2100 2101 /* insert into matrix */ 2102 jj = rstart; 2103 smycols = mycols; 2104 svals = vals; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2107 smycols += ourlens[i]; 2108 svals += ourlens[i]; 2109 jj++; 2110 } 2111 } 2112 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2113 ierr = PetscFree(vals);CHKERRQ(ierr); 2114 ierr = PetscFree(mycols);CHKERRQ(ierr); 2115 ierr = PetscFree(rowners);CHKERRQ(ierr); 2116 2117 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2118 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatEqual_MPIDense" 2124 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag) 2125 { 2126 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2127 Mat a,b; 2128 PetscTruth flg; 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 a = matA->A; 2133 b = matB->A; 2134 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2135 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #if defined(PETSC_HAVE_PLAPACK) 2140 2141 #undef __FUNCT__ 2142 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2143 /*@C 2144 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2145 Level: developer 2146 2147 .keywords: Petsc, destroy, package, PLAPACK 2148 .seealso: PetscFinalize() 2149 @*/ 2150 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2151 { 2152 PetscErrorCode ierr; 2153 2154 PetscFunctionBegin; 2155 ierr = PLA_Finalize();CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 #undef __FUNCT__ 2160 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2161 /*@C 2162 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2163 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2164 2165 Input Parameter: 2166 . comm - the communicator the matrix lives on 2167 2168 Level: developer 2169 2170 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2171 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2172 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2173 cannot overlap. 2174 2175 .keywords: Petsc, initialize, package, PLAPACK 2176 .seealso: PetscInitializePackage(), PetscInitialize() 2177 @*/ 2178 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2179 { 2180 PetscMPIInt size; 2181 PetscErrorCode ierr; 2182 2183 PetscFunctionBegin; 2184 if (!PLA_Initialized(PETSC_NULL)) { 2185 2186 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2187 Plapack_nprows = 1; 2188 Plapack_npcols = size; 2189 2190 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2191 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2192 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2193 #if defined(PETSC_USE_DEBUG) 2194 Plapack_ierror = 3; 2195 #else 2196 Plapack_ierror = 0; 2197 #endif 2198 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2199 if (Plapack_ierror){ 2200 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2201 } else { 2202 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2203 } 2204 2205 Plapack_nb_alg = 0; 2206 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2207 if (Plapack_nb_alg) { 2208 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2209 } 2210 PetscOptionsEnd(); 2211 2212 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2213 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2214 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #endif 2220