1 #define PETSCMAT_DLL 2 3 /* 4 Basic functions for basic parallel dense matrices. 5 */ 6 7 8 #include "../src/mat/impls/dense/mpi/mpidense.h" /*I "petscmat.h" I*/ 9 #if defined(PETSC_HAVE_PLAPACK) 10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg; 11 static MPI_Comm Plapack_comm_2d; 12 EXTERN_C_BEGIN 13 #include "PLA.h" 14 EXTERN_C_END 15 16 typedef struct { 17 PLA_Obj A,pivots; 18 PLA_Template templ; 19 MPI_Datatype datatype; 20 PetscInt nb,rstart; 21 VecScatter ctx; 22 IS is_pla,is_petsc; 23 PetscBool pla_solved; 24 MatStructure mstruct; 25 } Mat_Plapack; 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatDenseGetLocalMatrix" 30 /*@ 31 32 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 33 matrix that represents the operator. For sequential matrices it returns itself. 34 35 Input Parameter: 36 . A - the Seq or MPI dense matrix 37 38 Output Parameter: 39 . B - the inner matrix 40 41 Level: intermediate 42 43 @*/ 44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 45 { 46 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 47 PetscErrorCode ierr; 48 PetscBool flg; 49 50 PetscFunctionBegin; 51 ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 52 if (flg) { 53 *B = mat->A; 54 } else { 55 *B = A; 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatGetRow_MPIDense" 62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 63 { 64 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 65 PetscErrorCode ierr; 66 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 67 68 PetscFunctionBegin; 69 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 70 lrow = row - rstart; 71 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatRestoreRow_MPIDense" 77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 78 { 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 83 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 84 PetscFunctionReturn(0); 85 } 86 87 EXTERN_C_BEGIN 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense" 90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscBool *iscopy,MatReuse reuse,Mat *B) 91 { 92 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 93 PetscErrorCode ierr; 94 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 95 PetscScalar *array; 96 MPI_Comm comm; 97 98 PetscFunctionBegin; 99 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 100 101 /* The reuse aspect is not implemented efficiently */ 102 if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);} 103 104 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 105 ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr); 106 ierr = MatCreate(comm,B);CHKERRQ(ierr); 107 ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr); 108 ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 109 ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr); 110 ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr); 111 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 112 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 113 114 *iscopy = PETSC_TRUE; 115 PetscFunctionReturn(0); 116 } 117 EXTERN_C_END 118 119 #undef __FUNCT__ 120 #define __FUNCT__ "MatSetValues_MPIDense" 121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 122 { 123 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 124 PetscErrorCode ierr; 125 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 126 PetscBool roworiented = A->roworiented; 127 128 PetscFunctionBegin; 129 if (v) PetscValidScalarPointer(v,6); 130 for (i=0; i<m; i++) { 131 if (idxm[i] < 0) continue; 132 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 133 if (idxm[i] >= rstart && idxm[i] < rend) { 134 row = idxm[i] - rstart; 135 if (roworiented) { 136 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 137 } else { 138 for (j=0; j<n; j++) { 139 if (idxn[j] < 0) continue; 140 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 141 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 142 } 143 } 144 } else { 145 if (!A->donotstash) { 146 if (roworiented) { 147 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 148 } else { 149 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 150 } 151 } 152 } 153 } 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetValues_MPIDense" 159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 160 { 161 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 162 PetscErrorCode ierr; 163 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 164 165 PetscFunctionBegin; 166 for (i=0; i<m; i++) { 167 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 168 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 169 if (idxm[i] >= rstart && idxm[i] < rend) { 170 row = idxm[i] - rstart; 171 for (j=0; j<n; j++) { 172 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 173 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 174 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 175 } 176 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 177 } 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatGetArray_MPIDense" 183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[]) 184 { 185 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 186 PetscErrorCode ierr; 187 188 PetscFunctionBegin; 189 ierr = MatGetArray(a->A,array);CHKERRQ(ierr); 190 PetscFunctionReturn(0); 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatGetSubMatrix_MPIDense" 195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 196 { 197 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 198 Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 199 PetscErrorCode ierr; 200 PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 201 const PetscInt *irow,*icol; 202 PetscScalar *av,*bv,*v = lmat->v; 203 Mat newmat; 204 IS iscol_local; 205 206 PetscFunctionBegin; 207 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 208 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 209 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 210 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 211 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 212 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 213 214 /* No parallel redistribution currently supported! Should really check each index set 215 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 216 original matrix! */ 217 218 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 219 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 220 221 /* Check submatrix call */ 222 if (scall == MAT_REUSE_MATRIX) { 223 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 224 /* Really need to test rows and column sizes! */ 225 newmat = *B; 226 } else { 227 /* Create and fill new matrix */ 228 ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr); 229 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 230 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 231 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 232 } 233 234 /* Now extract the data pointers and do the copy, column at a time */ 235 newmatd = (Mat_MPIDense*)newmat->data; 236 bv = ((Mat_SeqDense *)newmatd->A->data)->v; 237 238 for (i=0; i<Ncols; i++) { 239 av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i]; 240 for (j=0; j<nrows; j++) { 241 *bv++ = av[irow[j] - rstart]; 242 } 243 } 244 245 /* Assemble the matrices so that the correct flags are set */ 246 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 247 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 248 249 /* Free work space */ 250 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 251 ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr); 252 *B = newmat; 253 PetscFunctionReturn(0); 254 } 255 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatRestoreArray_MPIDense" 258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 259 { 260 PetscFunctionBegin; 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatAssemblyBegin_MPIDense" 266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 267 { 268 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 269 MPI_Comm comm = ((PetscObject)mat)->comm; 270 PetscErrorCode ierr; 271 PetscInt nstash,reallocs; 272 InsertMode addv; 273 274 PetscFunctionBegin; 275 /* make sure all processors are either in INSERTMODE or ADDMODE */ 276 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 277 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 278 mat->insertmode = addv; /* in case this processor had no cache */ 279 280 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 281 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 282 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatAssemblyEnd_MPIDense" 288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 289 { 290 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 291 PetscErrorCode ierr; 292 PetscInt i,*row,*col,flg,j,rstart,ncols; 293 PetscMPIInt n; 294 PetscScalar *val; 295 InsertMode addv=mat->insertmode; 296 297 PetscFunctionBegin; 298 /* wait on receives */ 299 while (1) { 300 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 301 if (!flg) break; 302 303 for (i=0; i<n;) { 304 /* Now identify the consecutive vals belonging to the same row */ 305 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 306 if (j < n) ncols = j-i; 307 else ncols = n-i; 308 /* Now assemble all these values with a single function call */ 309 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 310 i = j; 311 } 312 } 313 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 314 315 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 316 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 317 318 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 319 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 320 } 321 PetscFunctionReturn(0); 322 } 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "MatZeroEntries_MPIDense" 326 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 327 { 328 PetscErrorCode ierr; 329 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 330 331 PetscFunctionBegin; 332 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 /* the code does not do the diagonal entries correctly unless the 337 matrix is square and the column and row owerships are identical. 338 This is a BUG. The only way to fix it seems to be to access 339 mdn->A and mdn->B directly and not through the MatZeroRows() 340 routine. 341 */ 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatZeroRows_MPIDense" 344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 345 { 346 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 347 PetscErrorCode ierr; 348 PetscInt i,*owners = A->rmap->range; 349 PetscInt *nprocs,j,idx,nsends; 350 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 351 PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 352 PetscInt *lens,*lrows,*values; 353 PetscMPIInt n,imdex,rank = l->rank,size = l->size; 354 MPI_Comm comm = ((PetscObject)A)->comm; 355 MPI_Request *send_waits,*recv_waits; 356 MPI_Status recv_status,*send_status; 357 PetscBool found; 358 359 PetscFunctionBegin; 360 /* first count number of contributors to each processor */ 361 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 362 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 363 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 364 for (i=0; i<N; i++) { 365 idx = rows[i]; 366 found = PETSC_FALSE; 367 for (j=0; j<size; j++) { 368 if (idx >= owners[j] && idx < owners[j+1]) { 369 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 370 } 371 } 372 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 373 } 374 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 375 376 /* inform other processors of number of messages and max length*/ 377 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 378 379 /* post receives: */ 380 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 381 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 382 for (i=0; i<nrecvs; i++) { 383 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 384 } 385 386 /* do sends: 387 1) starts[i] gives the starting index in svalues for stuff going to 388 the ith processor 389 */ 390 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 391 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 392 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 393 starts[0] = 0; 394 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 395 for (i=0; i<N; i++) { 396 svalues[starts[owner[i]]++] = rows[i]; 397 } 398 399 starts[0] = 0; 400 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 401 count = 0; 402 for (i=0; i<size; i++) { 403 if (nprocs[2*i+1]) { 404 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 405 } 406 } 407 ierr = PetscFree(starts);CHKERRQ(ierr); 408 409 base = owners[rank]; 410 411 /* wait on receives */ 412 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 413 count = nrecvs; 414 slen = 0; 415 while (count) { 416 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 417 /* unpack receives into our local space */ 418 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 419 source[imdex] = recv_status.MPI_SOURCE; 420 lens[imdex] = n; 421 slen += n; 422 count--; 423 } 424 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 425 426 /* move the data into the send scatter */ 427 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 428 count = 0; 429 for (i=0; i<nrecvs; i++) { 430 values = rvalues + i*nmax; 431 for (j=0; j<lens[i]; j++) { 432 lrows[count++] = values[j] - base; 433 } 434 } 435 ierr = PetscFree(rvalues);CHKERRQ(ierr); 436 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 437 ierr = PetscFree(owner);CHKERRQ(ierr); 438 ierr = PetscFree(nprocs);CHKERRQ(ierr); 439 440 /* actually zap the local rows */ 441 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 442 ierr = PetscFree(lrows);CHKERRQ(ierr); 443 444 /* wait on sends */ 445 if (nsends) { 446 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 447 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 448 ierr = PetscFree(send_status);CHKERRQ(ierr); 449 } 450 ierr = PetscFree(send_waits);CHKERRQ(ierr); 451 ierr = PetscFree(svalues);CHKERRQ(ierr); 452 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatMult_MPIDense" 458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 459 { 460 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 465 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 466 ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatMultAdd_MPIDense" 472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 473 { 474 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 475 PetscErrorCode ierr; 476 477 PetscFunctionBegin; 478 ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 480 ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatMultTranspose_MPIDense" 486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 487 { 488 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 489 PetscErrorCode ierr; 490 PetscScalar zero = 0.0; 491 492 PetscFunctionBegin; 493 ierr = VecSet(yy,zero);CHKERRQ(ierr); 494 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 495 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 496 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense" 502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 503 { 504 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 509 ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 510 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 511 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNCT__ 516 #define __FUNCT__ "MatGetDiagonal_MPIDense" 517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 521 PetscErrorCode ierr; 522 PetscInt len,i,n,m = A->rmap->n,radd; 523 PetscScalar *x,zero = 0.0; 524 525 PetscFunctionBegin; 526 ierr = VecSet(v,zero);CHKERRQ(ierr); 527 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 528 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 529 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 530 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 531 radd = A->rmap->rstart*m; 532 for (i=0; i<len; i++) { 533 x[i] = aloc->v[radd + i*m + i]; 534 } 535 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatDestroy_MPIDense" 541 PetscErrorCode MatDestroy_MPIDense(Mat mat) 542 { 543 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 544 PetscErrorCode ierr; 545 #if defined(PETSC_HAVE_PLAPACK) 546 Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr); 547 #endif 548 549 PetscFunctionBegin; 550 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 ierr = MatDestroy(mdn->A);CHKERRQ(ierr); 556 if (mdn->lvec) {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);} 557 if (mdn->Mvctx) {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);} 558 #if defined(PETSC_HAVE_PLAPACK) 559 if (lu) { 560 ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr); 561 ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr); 562 ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr); 563 564 if (lu->is_pla) { 565 ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr); 566 ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr); 567 ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr); 568 } 569 } 570 #endif 571 572 ierr = PetscFree(mdn);CHKERRQ(ierr); 573 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatView_MPIDense_Binary" 584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 585 { 586 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 587 PetscErrorCode ierr; 588 PetscViewerFormat format; 589 int fd; 590 PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 591 PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 592 PetscScalar *work,*v,*vv; 593 Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 594 MPI_Status status; 595 596 PetscFunctionBegin; 597 if (mdn->size == 1) { 598 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 599 } else { 600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 601 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 602 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 603 604 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 605 if (format == PETSC_VIEWER_NATIVE) { 606 607 if (!rank) { 608 /* store the matrix as a dense matrix */ 609 header[0] = MAT_FILE_CLASSID; 610 header[1] = mat->rmap->N; 611 header[2] = N; 612 header[3] = MATRIX_BINARY_FORMAT_DENSE; 613 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 614 615 /* get largest work array needed for transposing array */ 616 mmax = mat->rmap->n; 617 for (i=1; i<size; i++) { 618 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 619 } 620 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr); 621 622 /* write out local array, by rows */ 623 m = mat->rmap->n; 624 v = a->v; 625 for (j=0; j<N; j++) { 626 for (i=0; i<m; i++) { 627 work[j + i*N] = *v++; 628 } 629 } 630 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 631 /* get largest work array to receive messages from other processes, excludes process zero */ 632 mmax = 0; 633 for (i=1; i<size; i++) { 634 mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 635 } 636 ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr); 637 for(k = 1; k < size; k++) { 638 v = vv; 639 m = mat->rmap->range[k+1] - mat->rmap->range[k]; 640 ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 641 642 for(j = 0; j < N; j++) { 643 for(i = 0; i < m; i++) { 644 work[j + i*N] = *v++; 645 } 646 } 647 ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 648 } 649 ierr = PetscFree(work);CHKERRQ(ierr); 650 ierr = PetscFree(vv);CHKERRQ(ierr); 651 } else { 652 ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 653 } 654 } else { 655 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)"); 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket" 663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 664 { 665 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 666 PetscErrorCode ierr; 667 PetscMPIInt size = mdn->size,rank = mdn->rank; 668 const PetscViewerType vtype; 669 PetscBool iascii,isdraw; 670 PetscViewer sviewer; 671 PetscViewerFormat format; 672 #if defined(PETSC_HAVE_PLAPACK) 673 Mat_Plapack *lu; 674 #endif 675 676 PetscFunctionBegin; 677 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 678 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 679 if (iascii) { 680 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 681 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 682 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 683 MatInfo info; 684 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n, 686 (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 687 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 688 #if defined(PETSC_HAVE_PLAPACK) 689 ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr); 690 ierr = PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr); 691 ierr = PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr); 692 ierr = PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr); 693 if (mat->factortype){ 694 lu=(Mat_Plapack*)(mat->spptr); 695 ierr = PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr); 696 } 697 #else 698 ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 699 #endif 700 PetscFunctionReturn(0); 701 } else if (format == PETSC_VIEWER_ASCII_INFO) { 702 PetscFunctionReturn(0); 703 } 704 } else if (isdraw) { 705 PetscDraw draw; 706 PetscBool isnull; 707 708 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 709 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 710 if (isnull) PetscFunctionReturn(0); 711 } 712 713 if (size == 1) { 714 ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 715 } else { 716 /* assemble the entire matrix onto first processor. */ 717 Mat A; 718 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 719 PetscInt *cols; 720 PetscScalar *vals; 721 722 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 723 if (!rank) { 724 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 725 } else { 726 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 727 } 728 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 729 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 730 ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL); 731 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 732 733 /* Copy the matrix ... This isn't the most efficient means, 734 but it's quick for now */ 735 A->insertmode = INSERT_VALUES; 736 row = mat->rmap->rstart; m = mdn->A->rmap->n; 737 for (i=0; i<m; i++) { 738 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 739 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 740 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 741 row++; 742 } 743 744 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 745 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 746 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 747 if (!rank) { 748 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 749 /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/ 750 PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE); 751 ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 752 } 753 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 754 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 755 ierr = MatDestroy(A);CHKERRQ(ierr); 756 } 757 PetscFunctionReturn(0); 758 } 759 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatView_MPIDense" 762 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 763 { 764 PetscErrorCode ierr; 765 PetscBool iascii,isbinary,isdraw,issocket; 766 767 PetscFunctionBegin; 768 769 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 770 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 771 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 772 ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 773 774 if (iascii || issocket || isdraw) { 775 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 776 } else if (isbinary) { 777 ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 778 } else { 779 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatGetInfo_MPIDense" 786 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 787 { 788 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 789 Mat mdn = mat->A; 790 PetscErrorCode ierr; 791 PetscReal isend[5],irecv[5]; 792 793 PetscFunctionBegin; 794 info->block_size = 1.0; 795 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 796 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 797 isend[3] = info->memory; isend[4] = info->mallocs; 798 if (flag == MAT_LOCAL) { 799 info->nz_used = isend[0]; 800 info->nz_allocated = isend[1]; 801 info->nz_unneeded = isend[2]; 802 info->memory = isend[3]; 803 info->mallocs = isend[4]; 804 } else if (flag == MAT_GLOBAL_MAX) { 805 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 806 info->nz_used = irecv[0]; 807 info->nz_allocated = irecv[1]; 808 info->nz_unneeded = irecv[2]; 809 info->memory = irecv[3]; 810 info->mallocs = irecv[4]; 811 } else if (flag == MAT_GLOBAL_SUM) { 812 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 813 info->nz_used = irecv[0]; 814 info->nz_allocated = irecv[1]; 815 info->nz_unneeded = irecv[2]; 816 info->memory = irecv[3]; 817 info->mallocs = irecv[4]; 818 } 819 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 820 info->fill_ratio_needed = 0; 821 info->factor_mallocs = 0; 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNCT__ 826 #define __FUNCT__ "MatSetOption_MPIDense" 827 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 828 { 829 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 switch (op) { 834 case MAT_NEW_NONZERO_LOCATIONS: 835 case MAT_NEW_NONZERO_LOCATION_ERR: 836 case MAT_NEW_NONZERO_ALLOCATION_ERR: 837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 838 break; 839 case MAT_ROW_ORIENTED: 840 a->roworiented = flg; 841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 842 break; 843 case MAT_NEW_DIAGONALS: 844 case MAT_USE_HASH_TABLE: 845 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 846 break; 847 case MAT_IGNORE_OFF_PROC_ENTRIES: 848 a->donotstash = flg; 849 break; 850 case MAT_SYMMETRIC: 851 case MAT_STRUCTURALLY_SYMMETRIC: 852 case MAT_HERMITIAN: 853 case MAT_SYMMETRY_ETERNAL: 854 case MAT_IGNORE_LOWER_TRIANGULAR: 855 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 856 break; 857 default: 858 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 859 } 860 PetscFunctionReturn(0); 861 } 862 863 864 #undef __FUNCT__ 865 #define __FUNCT__ "MatDiagonalScale_MPIDense" 866 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 867 { 868 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 869 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 870 PetscScalar *l,*r,x,*v; 871 PetscErrorCode ierr; 872 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 873 874 PetscFunctionBegin; 875 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 876 if (ll) { 877 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 878 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 879 ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 880 for (i=0; i<m; i++) { 881 x = l[i]; 882 v = mat->v + i; 883 for (j=0; j<n; j++) { (*v) *= x; v+= m;} 884 } 885 ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 886 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 887 } 888 if (rr) { 889 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 890 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 891 ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 894 for (i=0; i<n; i++) { 895 x = r[i]; 896 v = mat->v + i*m; 897 for (j=0; j<m; j++) { (*v++) *= x;} 898 } 899 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 900 ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 901 } 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNCT__ 906 #define __FUNCT__ "MatNorm_MPIDense" 907 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 908 { 909 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 910 Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 911 PetscErrorCode ierr; 912 PetscInt i,j; 913 PetscReal sum = 0.0; 914 PetscScalar *v = mat->v; 915 916 PetscFunctionBegin; 917 if (mdn->size == 1) { 918 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 919 } else { 920 if (type == NORM_FROBENIUS) { 921 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 922 #if defined(PETSC_USE_COMPLEX) 923 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 924 #else 925 sum += (*v)*(*v); v++; 926 #endif 927 } 928 ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 929 *nrm = sqrt(*nrm); 930 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 931 } else if (type == NORM_1) { 932 PetscReal *tmp,*tmp2; 933 ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr); 934 ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 935 ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 936 *nrm = 0.0; 937 v = mat->v; 938 for (j=0; j<mdn->A->cmap->n; j++) { 939 for (i=0; i<mdn->A->rmap->n; i++) { 940 tmp[j] += PetscAbsScalar(*v); v++; 941 } 942 } 943 ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr); 944 for (j=0; j<A->cmap->N; j++) { 945 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 946 } 947 ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr); 948 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 949 } else if (type == NORM_INFINITY) { /* max row norm */ 950 PetscReal ntemp; 951 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 952 ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr); 953 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm"); 954 } 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatTranspose_MPIDense" 960 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 961 { 962 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 963 Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 964 Mat B; 965 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 966 PetscErrorCode ierr; 967 PetscInt j,i; 968 PetscScalar *v; 969 970 PetscFunctionBegin; 971 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place"); 972 if (reuse == MAT_INITIAL_MATRIX || A == *matout) { 973 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 974 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 975 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 976 ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr); 977 } else { 978 B = *matout; 979 } 980 981 m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 982 ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr); 983 for (i=0; i<m; i++) rwork[i] = rstart + i; 984 for (j=0; j<n; j++) { 985 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 986 v += m; 987 } 988 ierr = PetscFree(rwork);CHKERRQ(ierr); 989 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 992 *matout = B; 993 } else { 994 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 995 } 996 PetscFunctionReturn(0); 997 } 998 999 1000 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *); 1001 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 1002 1003 #undef __FUNCT__ 1004 #define __FUNCT__ "MatSetUpPreallocation_MPIDense" 1005 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A) 1006 { 1007 PetscErrorCode ierr; 1008 1009 PetscFunctionBegin; 1010 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #if defined(PETSC_HAVE_PLAPACK) 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatMPIDenseCopyToPlapack" 1018 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F) 1019 { 1020 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1021 PetscErrorCode ierr; 1022 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1023 PetscScalar *array; 1024 PetscReal one = 1.0; 1025 1026 PetscFunctionBegin; 1027 /* Copy A into F->lu->A */ 1028 ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr); 1029 ierr = PLA_API_begin();CHKERRQ(ierr); 1030 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1031 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1032 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1033 ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr); 1034 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1035 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1036 ierr = PLA_API_end();CHKERRQ(ierr); 1037 lu->rstart = rstart; 1038 PetscFunctionReturn(0); 1039 } 1040 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatMPIDenseCopyFromPlapack" 1043 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A) 1044 { 1045 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1046 PetscErrorCode ierr; 1047 PetscInt M=A->cmap->N,m=A->rmap->n,rstart; 1048 PetscScalar *array; 1049 PetscReal one = 1.0; 1050 1051 PetscFunctionBegin; 1052 /* Copy F into A->lu->A */ 1053 ierr = MatZeroEntries(A);CHKERRQ(ierr); 1054 ierr = PLA_API_begin();CHKERRQ(ierr); 1055 ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr); 1056 ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr); 1057 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1058 ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr); 1059 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1060 ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr); 1061 ierr = PLA_API_end();CHKERRQ(ierr); 1062 lu->rstart = rstart; 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense" 1068 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1069 { 1070 PetscErrorCode ierr; 1071 Mat_Plapack *luA = (Mat_Plapack*)A->spptr; 1072 Mat_Plapack *luB = (Mat_Plapack*)B->spptr; 1073 Mat_Plapack *luC = (Mat_Plapack*)C->spptr; 1074 PLA_Obj alpha = NULL,beta = NULL; 1075 1076 PetscFunctionBegin; 1077 ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr); 1078 ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr); 1079 1080 /* 1081 ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr); 1082 ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr); 1083 */ 1084 1085 /* do the multiply in PLA */ 1086 ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr); 1087 ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr); 1088 CHKMEMQ; 1089 1090 ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */ 1091 CHKMEMQ; 1092 ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr); 1093 ierr = PLA_Obj_free(&beta);CHKERRQ(ierr); 1094 1095 /* 1096 ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr); 1097 */ 1098 ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense" 1104 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1105 { 1106 PetscErrorCode ierr; 1107 PetscInt m=A->rmap->n,n=B->cmap->n; 1108 Mat Cmat; 1109 1110 PetscFunctionBegin; 1111 if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 1112 SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported"); 1113 ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr); 1114 ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 1115 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 1116 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1117 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1118 1119 *C = Cmat; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense" 1125 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1126 { 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 if (scall == MAT_INITIAL_MATRIX){ 1131 ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1132 } 1133 ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatSolve_MPIDense" 1139 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x) 1140 { 1141 MPI_Comm comm = ((PetscObject)A)->comm; 1142 Mat_Plapack *lu = (Mat_Plapack*)A->spptr; 1143 PetscErrorCode ierr; 1144 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride; 1145 PetscScalar *array; 1146 PetscReal one = 1.0; 1147 PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;; 1148 PLA_Obj v_pla = NULL; 1149 PetscScalar *loc_buf; 1150 Vec loc_x; 1151 1152 PetscFunctionBegin; 1153 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1154 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1155 1156 /* Create PLAPACK vector objects, then copy b into PLAPACK b */ 1157 PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla); 1158 PLA_Obj_set_to_zero(v_pla); 1159 1160 /* Copy b into rhs_pla */ 1161 PLA_API_begin(); 1162 PLA_Obj_API_open(v_pla); 1163 ierr = VecGetArray(b,&array);CHKERRQ(ierr); 1164 PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart); 1165 ierr = VecRestoreArray(b,&array);CHKERRQ(ierr); 1166 PLA_Obj_API_close(v_pla); 1167 PLA_API_end(); 1168 1169 if (A->factortype == MAT_FACTOR_LU){ 1170 /* Apply the permutations to the right hand sides */ 1171 PLA_Apply_pivots_to_rows (v_pla,lu->pivots); 1172 1173 /* Solve L y = b, overwriting b with y */ 1174 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla ); 1175 1176 /* Solve U x = y (=b), overwriting b with x */ 1177 PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla ); 1178 } else { /* MAT_FACTOR_CHOLESKY */ 1179 PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla); 1180 PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE), 1181 PLA_NONUNIT_DIAG,lu->A,v_pla); 1182 } 1183 1184 /* Copy PLAPACK x into Petsc vector x */ 1185 PLA_Obj_local_length(v_pla, &loc_m); 1186 PLA_Obj_local_buffer(v_pla, (void**)&loc_buf); 1187 PLA_Obj_local_stride(v_pla, &loc_stride); 1188 /* 1189 PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 1190 */ 1191 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr); 1192 if (!lu->pla_solved){ 1193 1194 PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc); 1195 PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc); 1196 1197 /* Create IS and cts for VecScatterring */ 1198 PLA_Obj_local_length(v_pla, &loc_m); 1199 PLA_Obj_local_stride(v_pla, &loc_stride); 1200 ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr); 1201 1202 rstart = (r_rank*c_nproc+c_rank)*lu->nb; 1203 for (i=0; i<loc_m; i+=lu->nb){ 1204 j = 0; 1205 while (j < lu->nb && i+j < loc_m){ 1206 idx_petsc[i+j] = rstart + j; j++; 1207 } 1208 rstart += size*lu->nb; 1209 } 1210 1211 for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride; 1212 1213 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr); 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr); 1215 ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr); 1216 ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr); 1217 } 1218 ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1219 ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 1221 /* Free data */ 1222 ierr = VecDestroy(loc_x);CHKERRQ(ierr); 1223 PLA_Obj_free(&v_pla); 1224 1225 lu->pla_solved = PETSC_TRUE; 1226 PetscFunctionReturn(0); 1227 } 1228 1229 #undef __FUNCT__ 1230 #define __FUNCT__ "MatLUFactorNumeric_MPIDense" 1231 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1232 { 1233 Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr; 1234 PetscErrorCode ierr; 1235 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1236 PetscInt info_pla=0; 1237 PetscScalar *array,one = 1.0; 1238 1239 PetscFunctionBegin; 1240 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1241 PLA_Obj_free(&lu->A); 1242 PLA_Obj_free (&lu->pivots); 1243 } 1244 /* Create PLAPACK matrix object */ 1245 lu->A = NULL; lu->pivots = NULL; 1246 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1247 PLA_Obj_set_to_zero(lu->A); 1248 PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots); 1249 1250 /* Copy A into lu->A */ 1251 PLA_API_begin(); 1252 PLA_Obj_API_open(lu->A); 1253 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1254 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1255 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1256 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1257 PLA_Obj_API_close(lu->A); 1258 PLA_API_end(); 1259 1260 /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */ 1261 info_pla = PLA_LU(lu->A,lu->pivots); 1262 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla); 1263 1264 lu->rstart = rstart; 1265 lu->mstruct = SAME_NONZERO_PATTERN; 1266 F->ops->solve = MatSolve_MPIDense; 1267 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense" 1273 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info) 1274 { 1275 Mat_Plapack *lu = (Mat_Plapack*)F->spptr; 1276 PetscErrorCode ierr; 1277 PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend; 1278 PetscInt info_pla=0; 1279 PetscScalar *array,one = 1.0; 1280 1281 PetscFunctionBegin; 1282 if (lu->mstruct == SAME_NONZERO_PATTERN){ 1283 PLA_Obj_free(&lu->A); 1284 } 1285 /* Create PLAPACK matrix object */ 1286 lu->A = NULL; 1287 lu->pivots = NULL; 1288 PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A); 1289 1290 /* Copy A into lu->A */ 1291 PLA_API_begin(); 1292 PLA_Obj_API_open(lu->A); 1293 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 1294 ierr = MatGetArray(A,&array);CHKERRQ(ierr); 1295 PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0); 1296 ierr = MatRestoreArray(A,&array);CHKERRQ(ierr); 1297 PLA_Obj_API_close(lu->A); 1298 PLA_API_end(); 1299 1300 /* Factor P A -> Chol */ 1301 info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A); 1302 if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla); 1303 1304 lu->rstart = rstart; 1305 lu->mstruct = SAME_NONZERO_PATTERN; 1306 F->ops->solve = MatSolve_MPIDense; 1307 F->assembled = PETSC_TRUE; /* required by -ksp_view */ 1308 PetscFunctionReturn(0); 1309 } 1310 1311 /* Note the Petsc perm permutation is ignored */ 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense" 1314 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info) 1315 { 1316 PetscErrorCode ierr; 1317 PetscBool issymmetric,set; 1318 1319 PetscFunctionBegin; 1320 ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr); 1321 if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()"); 1322 F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense; 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* Note the Petsc r and c permutations are ignored */ 1327 #undef __FUNCT__ 1328 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense" 1329 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 1330 { 1331 PetscErrorCode ierr; 1332 PetscInt M = A->rmap->N; 1333 Mat_Plapack *lu; 1334 1335 PetscFunctionBegin; 1336 lu = (Mat_Plapack*)F->spptr; 1337 ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr); 1338 F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense; 1339 PetscFunctionReturn(0); 1340 } 1341 1342 EXTERN_C_BEGIN 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack" 1345 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type) 1346 { 1347 PetscFunctionBegin; 1348 *type = MATSOLVERPLAPACK; 1349 PetscFunctionReturn(0); 1350 } 1351 EXTERN_C_END 1352 1353 EXTERN_C_BEGIN 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatGetFactor_mpidense_plapack" 1356 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F) 1357 { 1358 PetscErrorCode ierr; 1359 Mat_Plapack *lu; 1360 PetscMPIInt size; 1361 PetscInt M=A->rmap->N; 1362 1363 PetscFunctionBegin; 1364 /* Create the factorization matrix */ 1365 ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr); 1366 ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1367 ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr); 1368 ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr); 1369 (*F)->spptr = (void*)lu; 1370 1371 /* Set default Plapack parameters */ 1372 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1373 lu->nb = M/size; 1374 if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */ 1375 1376 /* Set runtime options */ 1377 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr); 1378 ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr); 1379 PetscOptionsEnd(); 1380 1381 /* Create object distribution template */ 1382 lu->templ = NULL; 1383 ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr); 1384 1385 /* Set the datatype */ 1386 #if defined(PETSC_USE_COMPLEX) 1387 lu->datatype = MPI_DOUBLE_COMPLEX; 1388 #else 1389 lu->datatype = MPI_DOUBLE; 1390 #endif 1391 1392 ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr); 1393 1394 1395 lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */ 1396 lu->mstruct = DIFFERENT_NONZERO_PATTERN; 1397 1398 if (ftype == MAT_FACTOR_LU) { 1399 (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense; 1400 } else if (ftype == MAT_FACTOR_CHOLESKY) { 1401 (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense; 1402 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices"); 1403 (*F)->factortype = ftype; 1404 ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 EXTERN_C_END 1408 #endif 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatGetFactor_mpidense_petsc" 1412 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F) 1413 { 1414 #if defined(PETSC_HAVE_PLAPACK) 1415 PetscErrorCode ierr; 1416 #endif 1417 1418 PetscFunctionBegin; 1419 #if defined(PETSC_HAVE_PLAPACK) 1420 ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr); 1421 #else 1422 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name); 1423 #endif 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatAXPY_MPIDense" 1429 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 1430 { 1431 PetscErrorCode ierr; 1432 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1433 1434 PetscFunctionBegin; 1435 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatConjugate_MPIDense" 1441 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat) 1442 { 1443 Mat_MPIDense *a = (Mat_MPIDense *)mat->data; 1444 PetscErrorCode ierr; 1445 1446 PetscFunctionBegin; 1447 ierr = MatConjugate(a->A);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatRealPart_MPIDense" 1453 PetscErrorCode MatRealPart_MPIDense(Mat A) 1454 { 1455 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1456 PetscErrorCode ierr; 1457 1458 PetscFunctionBegin; 1459 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatImaginaryPart_MPIDense" 1465 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1466 { 1467 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1468 PetscErrorCode ierr; 1469 1470 PetscFunctionBegin; 1471 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 /* -------------------------------------------------------------------*/ 1476 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense, 1477 MatGetRow_MPIDense, 1478 MatRestoreRow_MPIDense, 1479 MatMult_MPIDense, 1480 /* 4*/ MatMultAdd_MPIDense, 1481 MatMultTranspose_MPIDense, 1482 MatMultTransposeAdd_MPIDense, 1483 0, 1484 0, 1485 0, 1486 /*10*/ 0, 1487 0, 1488 0, 1489 0, 1490 MatTranspose_MPIDense, 1491 /*15*/ MatGetInfo_MPIDense, 1492 MatEqual_MPIDense, 1493 MatGetDiagonal_MPIDense, 1494 MatDiagonalScale_MPIDense, 1495 MatNorm_MPIDense, 1496 /*20*/ MatAssemblyBegin_MPIDense, 1497 MatAssemblyEnd_MPIDense, 1498 MatSetOption_MPIDense, 1499 MatZeroEntries_MPIDense, 1500 /*24*/ MatZeroRows_MPIDense, 1501 0, 1502 0, 1503 0, 1504 0, 1505 /*29*/ MatSetUpPreallocation_MPIDense, 1506 0, 1507 0, 1508 MatGetArray_MPIDense, 1509 MatRestoreArray_MPIDense, 1510 /*34*/ MatDuplicate_MPIDense, 1511 0, 1512 0, 1513 0, 1514 0, 1515 /*39*/ MatAXPY_MPIDense, 1516 MatGetSubMatrices_MPIDense, 1517 0, 1518 MatGetValues_MPIDense, 1519 0, 1520 /*44*/ 0, 1521 MatScale_MPIDense, 1522 0, 1523 0, 1524 0, 1525 /*49*/ 0, 1526 0, 1527 0, 1528 0, 1529 0, 1530 /*54*/ 0, 1531 0, 1532 0, 1533 0, 1534 0, 1535 /*59*/ MatGetSubMatrix_MPIDense, 1536 MatDestroy_MPIDense, 1537 MatView_MPIDense, 1538 0, 1539 0, 1540 /*64*/ 0, 1541 0, 1542 0, 1543 0, 1544 0, 1545 /*69*/ 0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*74*/ 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*79*/ 0, 1556 0, 1557 0, 1558 0, 1559 /*83*/ MatLoad_MPIDense, 1560 0, 1561 0, 1562 0, 1563 0, 1564 0, 1565 /*89*/ 1566 #if defined(PETSC_HAVE_PLAPACK) 1567 MatMatMult_MPIDense_MPIDense, 1568 MatMatMultSymbolic_MPIDense_MPIDense, 1569 MatMatMultNumeric_MPIDense_MPIDense, 1570 #else 1571 0, 1572 0, 1573 0, 1574 #endif 1575 0, 1576 0, 1577 /*94*/ 0, 1578 0, 1579 0, 1580 0, 1581 0, 1582 /*99*/ 0, 1583 0, 1584 0, 1585 MatConjugate_MPIDense, 1586 0, 1587 /*104*/0, 1588 MatRealPart_MPIDense, 1589 MatImaginaryPart_MPIDense, 1590 0, 1591 0, 1592 /*109*/0, 1593 0, 1594 0, 1595 0, 1596 0, 1597 /*114*/0, 1598 0, 1599 0, 1600 0, 1601 0, 1602 /*119*/0, 1603 0, 1604 0, 1605 0 1606 }; 1607 1608 EXTERN_C_BEGIN 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense" 1611 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1612 { 1613 Mat_MPIDense *a; 1614 PetscErrorCode ierr; 1615 1616 PetscFunctionBegin; 1617 mat->preallocated = PETSC_TRUE; 1618 /* Note: For now, when data is specified above, this assumes the user correctly 1619 allocates the local dense storage space. We should add error checking. */ 1620 1621 a = (Mat_MPIDense*)mat->data; 1622 ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr); 1623 ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr); 1624 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1625 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1626 a->nvec = mat->cmap->n; 1627 1628 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1629 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1630 ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 1631 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1632 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 EXTERN_C_END 1636 1637 /*MC 1638 MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices 1639 1640 run ./configure with the option --download-plapack 1641 1642 1643 Options Database Keys: 1644 . -mat_plapack_nprows <n> - number of rows in processor partition 1645 . -mat_plapack_npcols <n> - number of columns in processor partition 1646 . -mat_plapack_nb <n> - block size of template vector 1647 . -mat_plapack_nb_alg <n> - algorithmic block size 1648 - -mat_plapack_ckerror <n> - error checking flag 1649 1650 Level: intermediate 1651 1652 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage 1653 1654 M*/ 1655 1656 EXTERN_C_BEGIN 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatCreate_MPIDense" 1659 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat) 1660 { 1661 Mat_MPIDense *a; 1662 PetscErrorCode ierr; 1663 1664 PetscFunctionBegin; 1665 ierr = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr); 1666 mat->data = (void*)a; 1667 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1668 mat->mapping = 0; 1669 1670 mat->insertmode = NOT_SET_VALUES; 1671 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr); 1672 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr); 1673 1674 /* build cache for off array entries formed */ 1675 a->donotstash = PETSC_FALSE; 1676 ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr); 1677 1678 /* stuff used for matrix vector multiply */ 1679 a->lvec = 0; 1680 a->Mvctx = 0; 1681 a->roworiented = PETSC_TRUE; 1682 1683 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C", 1684 "MatGetDiagonalBlock_MPIDense", 1685 MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C", 1687 "MatMPIDenseSetPreallocation_MPIDense", 1688 MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C", 1690 "MatMatMult_MPIAIJ_MPIDense", 1691 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1692 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C", 1693 "MatMatMultSymbolic_MPIAIJ_MPIDense", 1694 MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1695 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C", 1696 "MatMatMultNumeric_MPIAIJ_MPIDense", 1697 MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1698 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C", 1699 "MatGetFactor_mpidense_petsc", 1700 MatGetFactor_mpidense_petsc);CHKERRQ(ierr); 1701 #if defined(PETSC_HAVE_PLAPACK) 1702 ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C", 1703 "MatGetFactor_mpidense_plapack", 1704 MatGetFactor_mpidense_plapack);CHKERRQ(ierr); 1705 ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr); 1706 #endif 1707 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1708 1709 PetscFunctionReturn(0); 1710 } 1711 EXTERN_C_END 1712 1713 /*MC 1714 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1715 1716 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1717 and MATMPIDENSE otherwise. 1718 1719 Options Database Keys: 1720 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1721 1722 Level: beginner 1723 1724 1725 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1726 M*/ 1727 1728 EXTERN_C_BEGIN 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatCreate_Dense" 1731 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A) 1732 { 1733 PetscErrorCode ierr; 1734 PetscMPIInt size; 1735 1736 PetscFunctionBegin; 1737 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 1738 if (size == 1) { 1739 ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); 1740 } else { 1741 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 1742 } 1743 PetscFunctionReturn(0); 1744 } 1745 EXTERN_C_END 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatMPIDenseSetPreallocation" 1749 /*@C 1750 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1751 1752 Not collective 1753 1754 Input Parameters: 1755 . A - the matrix 1756 - data - optional location of matrix data. Set data=PETSC_NULL for PETSc 1757 to control all matrix memory allocation. 1758 1759 Notes: 1760 The dense format is fully compatible with standard Fortran 77 1761 storage by columns. 1762 1763 The data input variable is intended primarily for Fortran programmers 1764 who wish to allocate their own matrix memory space. Most users should 1765 set data=PETSC_NULL. 1766 1767 Level: intermediate 1768 1769 .keywords: matrix,dense, parallel 1770 1771 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1772 @*/ 1773 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data) 1774 { 1775 PetscErrorCode ierr; 1776 1777 PetscFunctionBegin; 1778 ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatCreateMPIDense" 1784 /*@C 1785 MatCreateMPIDense - Creates a sparse parallel matrix in dense format. 1786 1787 Collective on MPI_Comm 1788 1789 Input Parameters: 1790 + comm - MPI communicator 1791 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1792 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 1793 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1794 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1795 - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1796 to control all matrix memory allocation. 1797 1798 Output Parameter: 1799 . A - the matrix 1800 1801 Notes: 1802 The dense format is fully compatible with standard Fortran 77 1803 storage by columns. 1804 1805 The data input variable is intended primarily for Fortran programmers 1806 who wish to allocate their own matrix memory space. Most users should 1807 set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users). 1808 1809 The user MUST specify either the local or global matrix dimensions 1810 (possibly both). 1811 1812 Level: intermediate 1813 1814 .keywords: matrix,dense, parallel 1815 1816 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1817 @*/ 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 1819 { 1820 PetscErrorCode ierr; 1821 PetscMPIInt size; 1822 1823 PetscFunctionBegin; 1824 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1825 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1826 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1827 if (size > 1) { 1828 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1829 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1830 } else { 1831 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1832 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatDuplicate_MPIDense" 1839 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 1840 { 1841 Mat mat; 1842 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1843 PetscErrorCode ierr; 1844 1845 PetscFunctionBegin; 1846 *newmat = 0; 1847 ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr); 1848 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1849 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1850 a = (Mat_MPIDense*)mat->data; 1851 ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1852 1853 mat->factortype = A->factortype; 1854 mat->assembled = PETSC_TRUE; 1855 mat->preallocated = PETSC_TRUE; 1856 1857 a->size = oldmat->size; 1858 a->rank = oldmat->rank; 1859 mat->insertmode = NOT_SET_VALUES; 1860 a->nvec = oldmat->nvec; 1861 a->donotstash = oldmat->donotstash; 1862 1863 ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr); 1864 ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr); 1865 1866 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 1867 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1868 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1869 1870 *newmat = mat; 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile" 1876 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset) 1877 { 1878 PetscErrorCode ierr; 1879 PetscMPIInt rank,size; 1880 PetscInt *rowners,i,m,nz,j; 1881 PetscScalar *array,*vals,*vals_ptr; 1882 MPI_Status status; 1883 1884 PetscFunctionBegin; 1885 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1886 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1887 1888 /* determine ownership of all rows */ 1889 if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank); 1890 else m = newmat->rmap->n; 1891 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1892 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1893 rowners[0] = 0; 1894 for (i=2; i<=size; i++) { 1895 rowners[i] += rowners[i-1]; 1896 } 1897 1898 if (!sizesset) { 1899 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1900 } 1901 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 1902 ierr = MatGetArray(newmat,&array);CHKERRQ(ierr); 1903 1904 if (!rank) { 1905 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1906 1907 /* read in my part of the matrix numerical values */ 1908 ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 1909 1910 /* insert into matrix-by row (this is why cannot directly read into array */ 1911 vals_ptr = vals; 1912 for (i=0; i<m; i++) { 1913 for (j=0; j<N; j++) { 1914 array[i + j*m] = *vals_ptr++; 1915 } 1916 } 1917 1918 /* read in other processors and ship out */ 1919 for (i=1; i<size; i++) { 1920 nz = (rowners[i+1] - rowners[i])*N; 1921 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1922 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 1923 } 1924 } else { 1925 /* receive numeric values */ 1926 ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1927 1928 /* receive message of values*/ 1929 ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr); 1930 1931 /* insert into matrix-by row (this is why cannot directly read into array */ 1932 vals_ptr = vals; 1933 for (i=0; i<m; i++) { 1934 for (j=0; j<N; j++) { 1935 array[i + j*m] = *vals_ptr++; 1936 } 1937 } 1938 } 1939 ierr = PetscFree(rowners);CHKERRQ(ierr); 1940 ierr = PetscFree(vals);CHKERRQ(ierr); 1941 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1942 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatLoad_MPIDense" 1948 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 1949 { 1950 PetscScalar *vals,*svals; 1951 MPI_Comm comm = ((PetscObject)viewer)->comm; 1952 MPI_Status status; 1953 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz; 1954 PetscInt header[4],*rowlengths = 0,M,N,*cols; 1955 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1956 PetscInt i,nz,j,rstart,rend,sizesset=1,grows,gcols; 1957 int fd; 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1962 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1963 if (!rank) { 1964 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1965 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1966 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1967 } 1968 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 1969 1970 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1971 M = header[1]; N = header[2]; nz = header[3]; 1972 1973 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 1974 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 1975 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 1976 1977 /* If global sizes are set, check if they are consistent with that given in the file */ 1978 if (sizesset) { 1979 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 1980 } 1981 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 1982 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 1983 1984 /* 1985 Handle case where matrix is stored on disk as a dense matrix 1986 */ 1987 if (nz == MATRIX_BINARY_FORMAT_DENSE) { 1988 ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr); 1989 PetscFunctionReturn(0); 1990 } 1991 1992 /* determine ownership of all rows */ 1993 if (newmat->rmap->n < 0) m = PetscMPIIntCast(M/size + ((M % size) > rank)); 1994 else m = PetscMPIIntCast(newmat->rmap->n); 1995 ierr = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr); 1996 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1997 rowners[0] = 0; 1998 for (i=2; i<=size; i++) { 1999 rowners[i] += rowners[i-1]; 2000 } 2001 rstart = rowners[rank]; 2002 rend = rowners[rank+1]; 2003 2004 /* distribute row lengths to all processors */ 2005 ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr); 2006 if (!rank) { 2007 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2008 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2009 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2010 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2011 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2012 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2013 } else { 2014 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2015 } 2016 2017 if (!rank) { 2018 /* calculate the number of nonzeros on each processor */ 2019 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2020 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2021 for (i=0; i<size; i++) { 2022 for (j=rowners[i]; j< rowners[i+1]; j++) { 2023 procsnz[i] += rowlengths[j]; 2024 } 2025 } 2026 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2027 2028 /* determine max buffer needed and allocate it */ 2029 maxnz = 0; 2030 for (i=0; i<size; i++) { 2031 maxnz = PetscMax(maxnz,procsnz[i]); 2032 } 2033 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2034 2035 /* read in my part of the matrix column indices */ 2036 nz = procsnz[0]; 2037 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2038 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2039 2040 /* read in every one elses and ship off */ 2041 for (i=1; i<size; i++) { 2042 nz = procsnz[i]; 2043 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2044 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2045 } 2046 ierr = PetscFree(cols);CHKERRQ(ierr); 2047 } else { 2048 /* determine buffer space needed for message */ 2049 nz = 0; 2050 for (i=0; i<m; i++) { 2051 nz += ourlens[i]; 2052 } 2053 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2054 2055 /* receive message of column indices*/ 2056 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2057 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2058 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2059 } 2060 2061 /* loop over local rows, determining number of off diagonal entries */ 2062 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2063 jj = 0; 2064 for (i=0; i<m; i++) { 2065 for (j=0; j<ourlens[i]; j++) { 2066 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 2067 jj++; 2068 } 2069 } 2070 2071 /* create our matrix */ 2072 for (i=0; i<m; i++) { 2073 ourlens[i] -= offlens[i]; 2074 } 2075 2076 if (!sizesset) { 2077 ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2078 } 2079 ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr); 2080 for (i=0; i<m; i++) { 2081 ourlens[i] += offlens[i]; 2082 } 2083 2084 if (!rank) { 2085 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2086 2087 /* read in my part of the matrix numerical values */ 2088 nz = procsnz[0]; 2089 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2090 2091 /* insert into matrix */ 2092 jj = rstart; 2093 smycols = mycols; 2094 svals = vals; 2095 for (i=0; i<m; i++) { 2096 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2097 smycols += ourlens[i]; 2098 svals += ourlens[i]; 2099 jj++; 2100 } 2101 2102 /* read in other processors and ship out */ 2103 for (i=1; i<size; i++) { 2104 nz = procsnz[i]; 2105 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2106 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2107 } 2108 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2109 } else { 2110 /* receive numeric values */ 2111 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2112 2113 /* receive message of values*/ 2114 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2115 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2116 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2117 2118 /* insert into matrix */ 2119 jj = rstart; 2120 smycols = mycols; 2121 svals = vals; 2122 for (i=0; i<m; i++) { 2123 ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2124 smycols += ourlens[i]; 2125 svals += ourlens[i]; 2126 jj++; 2127 } 2128 } 2129 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2130 ierr = PetscFree(vals);CHKERRQ(ierr); 2131 ierr = PetscFree(mycols);CHKERRQ(ierr); 2132 ierr = PetscFree(rowners);CHKERRQ(ierr); 2133 2134 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatEqual_MPIDense" 2141 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2142 { 2143 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2144 Mat a,b; 2145 PetscBool flg; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 a = matA->A; 2150 b = matB->A; 2151 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2152 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2153 PetscFunctionReturn(0); 2154 } 2155 2156 #if defined(PETSC_HAVE_PLAPACK) 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "PetscPLAPACKFinalizePackage" 2160 /*@C 2161 PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK. 2162 Level: developer 2163 2164 .keywords: Petsc, destroy, package, PLAPACK 2165 .seealso: PetscFinalize() 2166 @*/ 2167 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKFinalizePackage(void) 2168 { 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = PLA_Finalize();CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "PetscPLAPACKInitializePackage" 2178 /*@C 2179 PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is 2180 called from MatCreate_MPIDense() the first time an MPI dense matrix is called. 2181 2182 Input Parameter: 2183 . comm - the communicator the matrix lives on 2184 2185 Level: developer 2186 2187 Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the 2188 same communicator (because there is some global state that is initialized and used for all matrices). In addition if 2189 PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators 2190 cannot overlap. 2191 2192 .keywords: Petsc, initialize, package, PLAPACK 2193 .seealso: PetscSysInitializePackage(), PetscInitialize() 2194 @*/ 2195 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm) 2196 { 2197 PetscMPIInt size; 2198 PetscErrorCode ierr; 2199 2200 PetscFunctionBegin; 2201 if (!PLA_Initialized(PETSC_NULL)) { 2202 2203 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2204 Plapack_nprows = 1; 2205 Plapack_npcols = size; 2206 2207 ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr); 2208 ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr); 2209 ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr); 2210 #if defined(PETSC_USE_DEBUG) 2211 Plapack_ierror = 3; 2212 #else 2213 Plapack_ierror = 0; 2214 #endif 2215 ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr); 2216 if (Plapack_ierror){ 2217 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr); 2218 } else { 2219 ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr); 2220 } 2221 2222 Plapack_nb_alg = 0; 2223 ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr); 2224 if (Plapack_nb_alg) { 2225 ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr); 2226 } 2227 PetscOptionsEnd(); 2228 2229 ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr); 2230 ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr); 2231 ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr); 2232 } 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #endif 2237